sphinx

package

v0.0.0-...-c89f87b Latest Latest Go to latest Published: Apr 23, 2023 License: MIT, MIT Imports: 5 Imported by: 6

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/xlab/pocketsphinx-go

Links

Open Source Insights

Documentation ¶

Index ¶

Variables
type Config
- func NewConfig(opts ...Option) *Config
- func NewConfigRetain(ln *pocketsphinx.CommandLn) *Config
- func (c *Config) CommandLn() *pocketsphinx.CommandLn
- func (c *Config) Destroy() bool
- func (c *Config) Retain()
type Decoder
- func NewDecoder(cfg *Config) (*Decoder, error)
- func (d *Decoder) AddWord(word, phones String, update bool) (id int32, ok bool)
- func (d *Decoder) AllDuration() (speech, cpu, wall time.Duration)
- func (d *Decoder) Config() *Config
- func (d *Decoder) Decoder() *pocketsphinx.Decoder
- func (d *Decoder) Destroy() bool
- func (d *Decoder) EndUtt() bool
- func (d *Decoder) FramesSearched() int32
- func (d *Decoder) Hypothesis() (hyp string, score int32)
- func (d *Decoder) IsInSpeech() bool
- func (d *Decoder) LogMath() *LogMath
- func (d *Decoder) LookupWord(word String) (string, bool)
- func (d *Decoder) NewLattice(filename String) (*Lattice, error)
- func (d *Decoder) Probability() int32
- func (d *Decoder) ProcessCep(data [][]float32, noSearch, fullUtterance bool) (frames int32, ok bool)
- func (d *Decoder) ProcessRaw(data []int16, noSearch, fullUtterance bool) (frames int32, ok bool)
- func (d *Decoder) RawData() []int16
- func (d *Decoder) ReadDict(dictFile, fillerDictFile String) bool
- func (d *Decoder) Reconfigure(cfg *Config)
- func (d *Decoder) SetKeyphrase(name string, keyphrase string) int32
- func (d *Decoder) SetKws(name string, keyfile string) int32
- func (d *Decoder) SetRawDataSize(frames int32)
- func (d *Decoder) SetSearch(name string) int32
- func (d *Decoder) StartStream() bool
- func (d *Decoder) StartUtt() bool
- func (d *Decoder) UpdateMLLR(mllr *MLLR) *MLLR
- func (d *Decoder) UttDuration() (speech, cpu, wall time.Duration)
- func (d *Decoder) WordLattice() *Lattice
- func (d *Decoder) WriteDict(dictFile String) bool
type JSGF
- func JSGFParseFile(filename String, parent *JSGF) (*JSGF, error)
- func JSGFParseString(data String, parent *JSGF) (*JSGF, error)
- func NewJSGFGrammar(parent *JSGF) (*JSGF, error)
- func (j *JSGF) GrammarName() string
type JSGFRuleIter
type Lattice
- func (l *Lattice) BaseWord(node *LatticeNode) string
- func (l *Lattice) BestPath(model *NGramModel, lwf, ascale float32) *LatticeLink
- func (l *Lattice) Destroy() bool
- func (l *Lattice) Frames() int32
- func (l Lattice) Iter() *LatticeNodeIter
- func (l *Lattice) Lattice() *pocketsphinx.Lattice
- func (l *Lattice) LinkBaseWord(link *LatticeLink) string
- func (l *Lattice) LinkProbability(link *LatticeLink) (score, prob int32)
- func (l *Lattice) LinkWord(link *LatticeLink) string
- func (l *Lattice) LogMath() *LogMath
- func (l *Lattice) NewLink(from, to *LatticeNode, score, endFrame int32)
- func (l *Lattice) Posterior(model *NGramModel, ascale float32) int32
- func (l *Lattice) PosteriorPrune(model *NGramModel, ascale float32) int32
- func (l *Lattice) ProbabilityOf(node *LatticeNode) (*LatticeLink, int32)
- func (l *Lattice) Retain()
- func (l *Lattice) ReverseEdges(start, end *LatticeNode) *LatticeLink
- func (l *Lattice) ReverseNext(start *LatticeNode) *LatticeLink
- func (l *Lattice) TraverseEdges(start, end *LatticeNode) *LatticeLink
- func (l *Lattice) TraverseNext(end *LatticeNode) *LatticeLink
- func (l *Lattice) Word(node *LatticeNode) string
- func (l *Lattice) WriteTo(filename String) bool
- func (l *Lattice) WriteToHTK(filename String) bool
type LatticeLink
- func (l *LatticeLink) Nodes() (source, dest *LatticeNode)
- func (l *LatticeLink) Prev() *LatticeLink
- func (l *LatticeLink) Times() (start int32, end int16)
type LatticeLinkIter
- func (l *LatticeLinkIter) Close()
- func (l *LatticeLinkIter) Link() *LatticeLink
- func (l *LatticeLinkIter) Next() *LatticeLinkIter
type LatticeNode
- func (l *LatticeNode) Entries() *LatticeLinkIter
- func (l *LatticeNode) Exits() *LatticeLinkIter
- func (l *LatticeNode) Times() (start int32, first, last int16)
type LatticeNodeIter
- func (l *LatticeNodeIter) Close()
- func (l *LatticeNodeIter) Next() *LatticeNodeIter
- func (l *LatticeNodeIter) Node() *LatticeNode
type LogMath
- func (l LogMath) Add(p, q int32) int32
- func (l LogMath) AddExact(p, q int32) int32
- func (l *LogMath) Destroy() bool
- func (l LogMath) Exp(p int32) float64
- func (l LogMath) GetBase() float64
- func (l LogMath) GetShift() int32
- func (l LogMath) GetTableShape() (size, width, shift uint32, ok bool)
- func (l LogMath) GetWidth() int32
- func (l LogMath) GetZero() int32
- func (l LogMath) LnToLog(p float64) int32
- func (l LogMath) Log(p float64) int32
- func (l LogMath) Log10ToLog(p float64) int32
- func (l LogMath) Log10ToLogFloat(p float64) float32
- func (l LogMath) LogFloatToLog10(p float32) float64
- func (l *LogMath) LogMath() *pocketsphinx.Logmath
- func (l LogMath) LogToLn(p int32) float64
- func (l LogMath) LogToLog10(p int32) float64
- func (l *LogMath) Retain()
- func (l LogMath) WriteTo(filename String) bool
type MLLR
- func NewMLLR(filename String) (*MLLR, error)
- func (m *MLLR) Destroy() bool
- func (m *MLLR) MLLR() *pocketsphinx.Mllr
- func (m *MLLR) Retain()
type NGramCase
type NGramFileType
type NGramModel
- func NewNGramModel(fileName String, fileType NGramFileType, lmath *LogMath, opt ...NGramOptions) (*NGramModel, error)
- func (n *NGramModel) AddClass(className String, weight float32, words Strings, weights []float32) bool
- func (n *NGramModel) AddClassWord(className, word String, weight float32) int32
- func (n *NGramModel) AddWord(word String, weight float32) int32
- func (n *NGramModel) ApplyWeights(langWeight, insertionPenalty float32) bool
- func (n *NGramModel) BigramScore(w2, w1 int32) (score, nUsed int32)
- func (n *NGramModel) CaseFold(c NGramCase) bool
- func (n *NGramModel) Counts() []uint32
- func (n *NGramModel) Destroy() bool
- func (n *NGramModel) Flush()
- func (n *NGramModel) NGramModel() *pocketsphinx.NgramModel
- func (n *NGramModel) Probability(words Strings) int32
- func (n *NGramModel) QuickProbability(wordID int32, history []int32) (prob, nUsed int32)
- func (n *NGramModel) ReadClassDef(filename String) bool
- func (n *NGramModel) Retain()
- func (n *NGramModel) Score(wordID int32, history []int32) (score, nUsed int32)
- func (n *NGramModel) ScoreToProbability(score int32) int32
- func (n *NGramModel) Size() int32
- func (n *NGramModel) TrigramScore(w3, w2, w1 int32) (score, nUsed int32)
- func (n *NGramModel) UnknownWordID() int32
- func (n *NGramModel) Weights() (langWeight float32, wipLog int32)
- func (n *NGramModel) Word(wordID int32) string
- func (n *NGramModel) WordID(word String) int32
- func (n *NGramModel) WriteTo(filename String, format NGramFileType) bool
- func (n *NGramModel) Zero() int32
type NGramOptions
- func (n *NGramOptions) CommandLn() *pocketsphinx.CommandLn
- func (n *NGramOptions) Destroy() bool
- func (n *NGramOptions) LanguageWeight(v float32)
- func (n *NGramOptions) MMap(v bool)
- func (n *NGramOptions) WordInsertionPenalty(v float32)
type Option
- func AScaleOption(scale float32) Option
- func AWeightOption(weight int) Option
- func AllPhoneCIOption(ciUnitsOnly bool) Option
- func AllPhoneFileOption(filepath string) Option
- func BacktraceOption(backtrace bool) Option
- func BeamOption(width float64) Option
- func BestPathLWeightOption(weight float32) Option
- func BestpathOption(bestpath bool) Option
- func CompAllSenOption(compallsen bool) Option
- func DebugOption(level int) Option
- func DictCaseOption(sens bool) Option
- func DictFileOption(filename string) Option
- func DsRatioOption(ratio int) Option
- func FastFourierTransformOption(points int) Option
- func FeatParamsFileOption(filename string) Option
- func FillProbOption(prob float32) Option
- func FillerDictFileOption(filename string) Option
- func FiniteStateGrammarsOption(filepath string) Option
- func FwdFlatBeamOption(width float64) Option
- func FwdFlatLWeightOption(weight float32) Option
- func FwdFlatOption(fwdflat bool) Option
- func FwdFlatSfWinOption(frames int) Option
- func FwdFlatWBeamOption(width float64) Option
- func FwdFlateFWidOption(frames int) Option
- func FwdTreeOption(fwdtree bool) Option
- func HMMDirOption(dir string) Option
- func InputEndianOption(endian string) Option
- func KeyphraseOption(keyphrase string) Option
- func KeywordsDelayOption(delay int) Option
- func KeywordsFileOption(filename string) Option
- func KeywordsPLPOption(prob float64) Option
- func KeywordsThresholdOption(threshold float64) Option
- func LMFileOption(filename string) Option
- func LMNameOption(name string) Option
- func LMSetOption(set string) Option
- func LPBeamOption(width float64) Option
- func LPOnlyBeamOption(width float64) Option
- func LWeightOption(weight float32) Option
- func LatsizeOption(size int) Option
- func LogBaseOption(base float32) Option
- func LogFileOption(filename string) Option
- func MDefFileOption(filename string) Option
- func MFCLogDirOption(dir string) Option
- func MLLRFileOption(filename string) Option
- func MMapOption(mmap bool) Option
- func MaxHMMPFOption(max int) Option
- func MaxWPFOption(max int) Option
- func MeansFileOption(filename string) Option
- func MinEndFrOption(n int) Option
- func MixWFileOption(filename string) Option
- func MixWFloorOption(floor float32) Option
- func NewWordPenaltyOption(penalty float32) Option
- func PBeamOption(width float64) Option
- func PIPenaltyOption(penalty float32) Option
- func PLBeamOption(width float64) Option
- func PLPBeamOption(width float64) Option
- func PLWeightOption(weight float64) Option
- func PLWindowOption(frames int) Option
- func PipOption(penalty float32) Option
- func RawLogDirOption(dir string) Option
- func SampleRateOption(rate float32) Option
- func SenDumpFileOption(filename string) Option
- func SenLogDirOption(dir string) Option
- func SilProbOption(prob float32) Option
- func TMatFileOption(filename string) Option
- func TMatFloorOption(floor float32) Option
- func TopNBeamOption(width string) Option
- func TopNOption(max int) Option
- func UnigramWeightOption(weight float32) Option
- func UserOption(name string, v interface{}) Option
- func VarFileOption(filename int) Option
- func VarFloorOption(floor float32) Option
- func WBeamOption(width float64) Option
- func WIPenaltyOption(penalty float32) Option
type String
- func (s String) S() string
type Strings
- func (s Strings) B() [][]byte
- func (s Strings) S() []string

Constants ¶

This section is empty.

Variables ¶

View Source

var NGgramInvalidWordID int32 = pocketsphinx.NgramInvalidWid

Functions ¶

This section is empty.

Types ¶

type Config ¶

type Config struct {
	// contains filtered or unexported fields
}

func NewConfig ¶

func NewConfig(opts ...Option) *Config

NewConfig creates a new command-line argument set based on the provided config options.

func NewConfigRetain ¶

func NewConfigRetain(ln *pocketsphinx.CommandLn) *Config

NewConfigRetain gets a new config while retaining ownership of a command-line argument set.

func (*Config) CommandLn ¶

func (c *Config) CommandLn() *pocketsphinx.CommandLn

func (*Config) Destroy ¶

func (c *Config) Destroy() bool

func (*Config) Retain ¶

func (c *Config) Retain()

Retain retains ownership of a command-line argument set.

type Decoder ¶

type Decoder struct {
	// contains filtered or unexported fields
}

func NewDecoder ¶

func NewDecoder(cfg *Config) (*Decoder, error)

NewDecoder initializes the decoder from a configuration object.

func (*Decoder) AddWord ¶

func (d *Decoder) AddWord(word, phones String, update bool) (id int32, ok bool)

AddWord adds a word to the pronunciation dictionary.

This function adds a word to the pronunciation dictionary and the current language model (but not to the current FSG if FSG mode is enabled). If the word is already present in one or the other, it does whatever is necessary to ensure that the word can be recognized.

word is a word string to add, e.g. "hello". phones is a whitespace-separated list of phoneme strings describing pronunciation of the word, e.g. "HH AH L OW". If update is true, updates the search module (whichever one is currently active) to recognize the newly added word. If adding multiple words, it is more efficient to pass false here in all but the last word.

Returns the internal ID (>= 0) of the newly added word.

func (*Decoder) AllDuration ¶

func (d *Decoder) AllDuration() (speech, cpu, wall time.Duration)

AllDuration gets overall performance information.

speech — number of seconds of speech. cpu — number of seconds of CPU time used. wall — number of seconds of wall time used.

func (*Decoder) Config ¶

func (d *Decoder) Config() *Config

Config gets the configuration object for this decoder.

func (*Decoder) Decoder ¶

func (d *Decoder) Decoder() *pocketsphinx.Decoder

Decoder returns a retained copy of underlying reference to pocketsphinx.Decoder.

func (*Decoder) Destroy ¶

func (d *Decoder) Destroy() bool

func (*Decoder) EndUtt ¶

func (d *Decoder) EndUtt() bool

EndUtt ends utterance processing.

func (*Decoder) FramesSearched ¶

func (d *Decoder) FramesSearched() int32

FramesSearched gets the number of frames of data searched.

Note that there is a delay between this and the number of frames of audio which have been input to the system. This is due to the fact that acoustic features are computed using a sliding window of audio, and dynamic features are computed over a sliding window of acoustic features.

Returns number of frames of speech data which have been recognized so far.

func (*Decoder) Hypothesis ¶

func (d *Decoder) Hypothesis() (hyp string, score int32)

Hypothesis gets hypothesis string and path score.

Returns string containing best hypothesis at this point in decoding. Empty if no hypothesis is available. And path score of that string.

func (*Decoder) IsInSpeech ¶

func (d *Decoder) IsInSpeech() bool

IsInSpeech checks if the last feed audio buffer contained speech.

func (*Decoder) LogMath ¶

func (d *Decoder) LogMath() *LogMath

LogMath gets the log-math computation object for this decoder.

The decoder retains ownership of this pointer, so you should not attempt to free it manually. Use LogMath.Retain() if you wish to reuse it elsewhere.

func (*Decoder) LookupWord ¶

func (d *Decoder) LookupWord(word String) (string, bool)

LookupWord lookups for the word in the dictionary and returns phone transcription for it.

Returns whitespace-spearated phone string describing the pronunciation of the word, or empty string if word is not present in the dictionary.

func (*Decoder) NewLattice ¶

func (d *Decoder) NewLattice(filename String) (*Lattice, error)

NewLattice reads a lattice from a file on disk.

func (*Decoder) Probability ¶

func (d *Decoder) Probability() int32

Probability gets posterior probability of the best hypothesis.

Unless the BestpathOption option is enabled, this function will always return zero (corresponding to a posterior probability of 1.0). Even if BestpathOption is enabled, it will also return zero when called on a partial result. Ongoing research into effective confidence annotation for partial hypotheses may result in these restrictions being lifted in future versions.

func (*Decoder) ProcessCep ¶

func (d *Decoder) ProcessCep(data [][]float32, noSearch, fullUtterance bool) (frames int32, ok bool)

ProcessCep decodes acoustic feature data.

If noSearch is enabled, performs feature extraction but does no any recognition yet. This may be necessary if your processor has trouble doing recognition in real-time.

fullUtterance shows that this block of data is a full utterance worth of data. This may allow the recognizer to produce more accurate results.

Returns number of frames of data searched.

func (*Decoder) ProcessRaw ¶

func (d *Decoder) ProcessRaw(data []int16, noSearch, fullUtterance bool) (frames int32, ok bool)

ProcessRaw decodes a raw audio stream.

No headers are recognized in this files. The configuration parameters SampleRateOption and InputEndianOption are used to determine the sampling rate and endianness of the stream, respectively. Audio is always assumed to be 16-bit signed PCM.

If noSearch is enabled, performs feature extraction but does no any recognition yet. This may be necessary if your processor has trouble doing recognition in real-time.

fullUtterance shows that this block of data is a full utterance worth of data. This may allow the recognizer to produce more accurate results.

Returns number of frames of data searched.

func (*Decoder) RawData ¶

func (d *Decoder) RawData() []int16

RawData retrieves the raw data collected during utterance decoding.

func (*Decoder) ReadDict ¶

func (d *Decoder) ReadDict(dictFile, fillerDictFile String) bool

ReadDict reloads the pronunciation dictionary from a file.

This function replaces the current pronunciation dictionary with the one stored in dictFile. This also causes the active search module(s) to be reinitialized, in the same manner as calling Decoder.AddWord() with update=true.

dictFile is the path to dictionary file to load. fillerDictFile is the path to filler dictionary to load, or empty string to keep the existing filler dictionary.

func (*Decoder) Reconfigure ¶

func (d *Decoder) Reconfigure(cfg *Config)

Reconfigure reinitializes the decoder with updated configuration.

This function allows you to switch the acoustic model, dictionary, or other configuration without creating an entirely new decoding object.

An optional new configuration to use. If cfg is nil, the previous configuration will be reloaded, with any changes applied.

func (*Decoder) SetKeyphrase ¶

func (d *Decoder) SetKeyphrase(name string, keyphrase string) int32

SetKeyphrase associates keyword search with the provided name. Activate with Decoder.SetSearch()

func (*Decoder) SetKws ¶

func (d *Decoder) SetKws(name string, keyfile string) int32

SetKws associates keyword search with the provided file. Activate with Decoder.SetSearch()

func (*Decoder) SetRawDataSize ¶

func (d *Decoder) SetRawDataSize(frames int32)

SetRawDataSize sets the limit of the raw audio data to store in decoder to retrieve it later with Decoder.RawData().

func (*Decoder) SetSearch ¶

func (d *Decoder) SetSearch(name string) int32

SetSearch activates the named search. The search must be set beforehand with Decoder.SetKeyphrase() or Decoder.SetKws()

func (*Decoder) StartStream ¶

func (d *Decoder) StartStream() bool

StartStream starts processing of the stream of speech. Channel parameters like noise-level are maintained for the stream and reused among utterances. Times returned in segment iterators are also stream-wide.

func (*Decoder) StartUtt ¶

func (d *Decoder) StartUtt() bool

StartUtt starts utterance processing. This function should be called before any utterance data is passed to the decoder. It marks the start of a new utterance and reinitializes internal data structures.

func (*Decoder) UpdateMLLR ¶

func (d *Decoder) UpdateMLLR(mllr *MLLR) *MLLR

UpdateMLLR adapts current acoustic model using a linear transform (Maximum Likelihood Linear Regression).

mllr is the new transform to use, or nil to update the existing transform. The decoder retains ownership of this pointer, so you should not attempt to free it manually. Use MLLR.Retain() if you wish to reuse it elsewhere.

Returns the updated transform object for this decoder, or nil on failure.

func (*Decoder) UttDuration ¶

func (d *Decoder) UttDuration() (speech, cpu, wall time.Duration)

UttDuration gets performance information for the current utterance.

speech — number of seconds of speech. cpu — number of seconds of CPU time used. wall — number of seconds of wall time used.

func (*Decoder) WordLattice ¶

func (d *Decoder) WordLattice() *Lattice

WordLattice gets the word lattice object containing all hypotheses so far.

The pointer is owned by the decoder and you should not attempt to free it manually. It is only valid until the next utterance, unless you use Lattice.Retain() to retain it.

func (*Decoder) WriteDict ¶

func (d *Decoder) WriteDict(dictFile String) bool

WriteDict writes the current pronunciation dictionary to a file.

type JSGF ¶

type JSGF struct {
	// contains filtered or unexported fields
}

func JSGFParseFile ¶

func JSGFParseFile(filename String, parent *JSGF) (*JSGF, error)

func JSGFParseString ¶

func JSGFParseString(data String, parent *JSGF) (*JSGF, error)

func NewJSGFGrammar ¶

func NewJSGFGrammar(parent *JSGF) (*JSGF, error)

NewJSGFGrammar creates a new JSGF grammar. Parent is optional parent grammar for this one (nil, usually). Rturns new JSGF grammar object, or nil on failure.

func (*JSGF) GrammarName ¶

func (j *JSGF) GrammarName() string

type JSGFRuleIter ¶

type JSGFRuleIter pocketsphinx.JSGFRuleIter

type Lattice ¶

type Lattice struct {
	// contains filtered or unexported fields
}

Lattice word graph structure used in bestpath/nbest search.

func (*Lattice) BaseWord ¶

func (l *Lattice) BaseWord(node *LatticeNode) string

BaseWord gets base word string for this node.

func (*Lattice) BestPath ¶

func (l *Lattice) BestPath(model *NGramModel, lwf, ascale float32) *LatticeLink

BestPath does N-Gram based best-path search on a word graph using A*. Returns the final link in best path or nil upon error.

This function calculates both the best path as well as the forward probability used in confidence estimation.

func (*Lattice) Destroy ¶

func (l *Lattice) Destroy() bool

func (*Lattice) Frames ¶

func (l *Lattice) Frames() int32

Frames gets the number of frames in the lattice.

func (Lattice) Iter ¶

func (l Lattice) Iter() *LatticeNodeIter

Iter starts iterating over nodes in the lattice.

No particular order of traversal is guaranteed, and you should not depend on this.

func (*Lattice) Lattice ¶

func (l *Lattice) Lattice() *pocketsphinx.Lattice

Lattice returns a retained copy of underlying reference to pocketsphinx.Lattice.

func (*Lattice) LinkBaseWord ¶

func (l *Lattice) LinkBaseWord(link *LatticeLink) string

LinkBaseWord gets base word string from a lattice link.

func (*Lattice) LinkProbability ¶

func (l *Lattice) LinkProbability(link *LatticeLink) (score, prob int32)

LinkProbability gets acoustic score and posterior probability from a lattice link.

Posterior probability for this link. Log is expressed in the log-base used in the decoder. To convert to linear floating-point, use Lattice.LogMath().Exp(prob).

func (*Lattice) LinkWord ¶

func (l *Lattice) LinkWord(link *LatticeLink) string

LinkWord gets word string from a lattice link (possibly a pronunciation variant).

func (*Lattice) LogMath ¶

func (l *Lattice) LogMath() *LogMath

LogMath gets the log-math computation object for this lattice.

The lattice retains ownership of this pointer, so you should not attempt to free it manually. Use LogMath.Retain() if you wish to reuse it elsewhere.

func (*Lattice) NewLink ¶

func (l *Lattice) NewLink(from, to *LatticeNode, score, endFrame int32)

NewLink creates a directed link between from and to nodes, but if a link already exists, chooses one with the best score.

func (*Lattice) Posterior ¶

func (l *Lattice) Posterior(model *NGramModel, ascale float32) int32

Calculate link posterior probabilities on a word graph. Returns posterior probability of the utterance as a whole.

WARN: This function assumes that Lattice.BestPath() search has already been done.

func (*Lattice) PosteriorPrune ¶

func (l *Lattice) PosteriorPrune(model *NGramModel, ascale float32) int32

PosteriorPrune prunes all links (and associated nodes) below a certain posterior probability, return number of arcs removed.

beam represents the minimum posterior probability for links. This is expressed in the log-base used in the decoder. To convert from linear floating-point, use Lattice.LogMath().Log(prob).

WARN: This function assumes that Lattice.Posterior() has already been called.

func (*Lattice) ProbabilityOf ¶

func (l *Lattice) ProbabilityOf(node *LatticeNode) (*LatticeLink, int32)

ProbabilityOf node gets the best posterior probability and associated acoustic score from a lattice node. Returns exit link with highest posterior probability and the probability of this link.

Log is expressed in the log-base used in the decoder. To convert to linear floating-point, use Lattice.LogMath().Exp(prob).

func (*Lattice) Retain ¶

func (l *Lattice) Retain()

func (*Lattice) ReverseEdges ¶

func (l *Lattice) ReverseEdges(start, end *LatticeNode) *LatticeLink

ReverseEdges starts a reverse traversal of edges in a word graph.

See Lattice.TraverseEdges() for why this API is the way it is.

func (*Lattice) ReverseNext ¶

func (l *Lattice) ReverseNext(start *LatticeNode) *LatticeLink

ReverseNext gets the next link in reverse traversal.

func (*Lattice) TraverseEdges ¶

func (l *Lattice) TraverseEdges(start, end *LatticeNode) *LatticeLink

TraverseEdges starts a forward traversal of edges in a word graph.

A keen eye will notice an inconsistency in this API versus other types of iterators in PocketSphinx. The reason for this is that the traversal algorithm is much more efficient when it is able to modify the lattice structure. Therefore, to avoid giving the impression that multiple traversals are possible at once, no separate iterator structure is provided.

func (*Lattice) TraverseNext ¶

func (l *Lattice) TraverseNext(end *LatticeNode) *LatticeLink

TraverseNext gets the next link in forward traversal.

func (*Lattice) Word ¶

func (l *Lattice) Word(node *LatticeNode) string

Word gets word string for this node.

func (*Lattice) WriteTo ¶

func (l *Lattice) WriteTo(filename String) bool

WriteTo writes a lattice to disk.

func (*Lattice) WriteToHTK ¶

func (l *Lattice) WriteToHTK(filename String) bool

WriteToHTK writes a lattice to disk in HTK format.

type LatticeLink ¶

type LatticeLink pocketsphinx.Latlink

LatticeLink represents links between DAG nodes.

A link corresponds to a single hypothesized instance of a word with a given start and end point.

func (*LatticeLink) Nodes ¶

func (l *LatticeLink) Nodes() (source, dest *LatticeNode)

Nodes gets destination and source nodes from a lattice link

func (*LatticeLink) Prev ¶

func (l *LatticeLink) Prev() *LatticeLink

Prev gets predecessor link in best path.

func (*LatticeLink) Times ¶

func (l *LatticeLink) Times() (start int32, end int16)

Times gets start and end times from a lattice link.

start - start frame of this link. end - end frame of this link.

These are inclusive, i.e. the last frame of this word is end, not end-1.

type LatticeLinkIter ¶

type LatticeLinkIter pocketsphinx.LatlinkIter

LatticeLinkIter iterator over DAG links.

func (*LatticeLinkIter) Close ¶

func (l *LatticeLinkIter) Close()

Close stops the iteration over links.

func (*LatticeLinkIter) Link ¶

func (l *LatticeLinkIter) Link() *LatticeLink

Link gets link from iterator.

func (*LatticeLinkIter) Next ¶

func (l *LatticeLinkIter) Next() *LatticeLinkIter

Next gets next link from a lattice link iterator.

type LatticeNode ¶

type LatticeNode pocketsphinx.Latnode

LatticeNode represents DAG nodes.

A node corresponds to a number of hypothesized instances of a word which all share the same starting point.

func (*LatticeNode) Entries ¶

func (l *LatticeNode) Entries() *LatticeLinkIter

Entries returns an iterator over entries to this node.

func (*LatticeNode) Exits ¶

func (l *LatticeNode) Exits() *LatticeLinkIter

Exits returns an iterator over exits from this node.

func (*LatticeNode) Times ¶

func (l *LatticeNode) Times() (start int32, first, last int16)

Times gets start and end time range for a node.

first — end frame of first exit from this node. last — end frame of last exit from this node. start — start frame for all edges exiting this node.

type LatticeNodeIter ¶

type LatticeNodeIter pocketsphinx.LatnodeIter

LatticeNodeIter iterator over DAG nodes.

func (*LatticeNodeIter) Close ¶

func (l *LatticeNodeIter) Close()

Close stops iterating over nodes.

func (*LatticeNodeIter) Next ¶

func (l *LatticeNodeIter) Next() *LatticeNodeIter

Next moves to next node in iteration.

func (*LatticeNodeIter) Node ¶

func (l *LatticeNodeIter) Node() *LatticeNode

Node gets node from iterator.

type LogMath ¶

type LogMath struct {
	// contains filtered or unexported fields
}

LogMath integer log math computation class.

func (LogMath) Add ¶

func (l LogMath) Add(p, q int32) int32

Add two values in log space (i.e. return log(exp(p)+exp(q)))

func (LogMath) AddExact ¶

func (l LogMath) AddExact(p, q int32) int32

AddExact adds two values in log space exactly and slowly (without using add table).

func (*LogMath) Destroy ¶

func (l *LogMath) Destroy() bool

func (LogMath) Exp ¶

func (l LogMath) Exp(p int32) float64

Exp converts integer log in base B to linear floating point.

func (LogMath) GetBase ¶

func (l LogMath) GetBase() float64

GetBase gets the log base.

func (LogMath) GetShift ¶

func (l LogMath) GetShift() int32

GetShift gets the shift of the values in a log table.

func (LogMath) GetTableShape ¶

func (l LogMath) GetTableShape() (size, width, shift uint32, ok bool)

GetTableShape gets the log table size and dimensions.

func (LogMath) GetWidth ¶

func (l LogMath) GetWidth() int32

GetWidth gets the width of the values in a log table.

func (LogMath) GetZero ¶

func (l LogMath) GetZero() int32

GetZero gets the smallest possible value represented in this base.

func (LogMath) LnToLog ¶

func (l LogMath) LnToLog(p float64) int32

LnToLog converts natural log (in floating point) to integer log in base B.

func (LogMath) Log ¶

func (l LogMath) Log(p float64) int32

Log converts linear floating point number to integer log in base B.

func (LogMath) Log10ToLog ¶

func (l LogMath) Log10ToLog(p float64) int32

Log10ToLog converts base 10 log (in floating point) to integer log in base B.

func (LogMath) Log10ToLogFloat ¶

func (l LogMath) Log10ToLogFloat(p float64) float32

Log10ToLogFloat converts base 10 log (in floating point) to float log in base B.

func (LogMath) LogFloatToLog10 ¶

func (l LogMath) LogFloatToLog10(p float32) float64

LogFloatToLog10 converts float log in base B to base 10 log.

func (*LogMath) LogMath ¶

func (l *LogMath) LogMath() *pocketsphinx.Logmath

LogMath returns a retained copy of underlying reference to pocketsphinx.Logmath.

func (LogMath) LogToLn ¶

func (l LogMath) LogToLn(p int32) float64

LogToLn converts integer log in base B to natural log (in floating point).

func (LogMath) LogToLog10 ¶

func (l LogMath) LogToLog10(p int32) float64

LogToLog10 converts integer log in base B to base 10 log (in floating point).

func (*LogMath) Retain ¶

func (l *LogMath) Retain()

func (LogMath) WriteTo ¶

func (l LogMath) WriteTo(filename String) bool

WriteTo writes a log table to a file.

type MLLR ¶

type MLLR struct {
	// contains filtered or unexported fields
}

func NewMLLR ¶

func NewMLLR(filename String) (*MLLR, error)

NewMLLR reads a speaker-adaptive linear transform from a file (mllr_matrix). See http://cmusphinx.sourceforge.net/wiki/tutorialadapt for details.

func (*MLLR) Destroy ¶

func (m *MLLR) Destroy() bool

func (*MLLR) MLLR ¶

func (m *MLLR) MLLR() *pocketsphinx.Mllr

MLLR returns a retained copy of underlying reference to pocketsphinx.Mllr.

func (*MLLR) Retain ¶

func (m *MLLR) Retain()

type NGramCase ¶

type NGramCase int32

NgramCase as declared in sphinxbase/ngram_model.h:166

const (
	NGramUpper NGramCase = NGramCase(pocketsphinx.NgramUpper)
	NGramLower NGramCase = NGramCase(pocketsphinx.NgramLower)
)

Constants for case folding.

type NGramFileType ¶

type NGramFileType int32

NgramFileType as declared in sphinxbase/ngram_model.h:81

const (
	// NGramInvalid is not a valid file type.
	NGramInvalid NGramFileType = NGramFileType(pocketsphinx.NgramInvalid)
	// NGramAuto to determine file type automatically.
	NGramAuto NGramFileType = NGramFileType(pocketsphinx.NgramAuto)
	// NGramArpa is for ARPABO text format (the standard).
	NGramArpa NGramFileType = NGramFileType(pocketsphinx.NgramArpa)
	// NGramBin is for sphinx .DMP format.
	NGramBin NGramFileType = NGramFileType(pocketsphinx.NgramBin)
)

File types for N-Gram files.

type NGramModel ¶

type NGramModel struct {
	// contains filtered or unexported fields
}

NGramModel is a type representing an N-Gram based language model.

func NewNGramModel ¶

func NewNGramModel(fileName String, fileType NGramFileType,
	lmath *LogMath, opt ...NGramOptions) (*NGramModel, error)

NewNGramModel reads an N-Gram model from a file on disk.

lmath carries log-math parameters to use for probability calculations. Ownership of this object is assumed by the newly created NGramModel, and you should not attempt to free it manually. If you wish to reuse it elsewhere, you must retain it with LogMath.Retain().

func (*NGramModel) AddClass ¶

func (n *NGramModel) AddClass(className String, weight float32, words Strings, weights []float32) bool

Add a new class to a language model.

If className already exists in the unigram set for NGramModel, then it will be converted to a class tag, and weight will be ignored. Otherwise, a new unigram will be created as in NGramModel.AddWord().

func (*NGramModel) AddClassWord ¶

func (n *NGramModel) AddClassWord(className, word String, weight float32) int32

AddClassWord adds a word to a class in a language model with a weight of this word relative to the within-class uniform distribution. Returns word ID.

func (*NGramModel) AddWord ¶

func (n *NGramModel) AddWord(word String, weight float32) int32

AddWord adds a word (unigram) to the language model and returns the word ID for the new word.

The semantics of this are not particularly well-defined for model sets, and may be subject to change. Currently this will add the word to all of the submodels

func (*NGramModel) ApplyWeights ¶

func (n *NGramModel) ApplyWeights(langWeight, insertionPenalty float32) bool

ApplyWeights applies a language weight and insertion penalty weight to a language model.

This will change the values output by NGramModel.Score() and friends. This is done for efficiency since in decoding, these are the only values we actually need. Call NGramModel.Probability() if you want the "raw" N-Gram probability estimate.

To remove all weighting, call NGramModel.ApplyWeights(1.0, 1.0).

func (*NGramModel) BigramScore ¶

func (n *NGramModel) BigramScore(w2, w1 int32) (score, nUsed int32)

TrigramScore does quick bigram score lookup.

func (*NGramModel) CaseFold ¶

func (n *NGramModel) CaseFold(c NGramCase) bool

CaseFold word strings in an N-Gram model.

WARNING: This is not Unicode aware, so any non-ASCII characters will not be converted.

func (*NGramModel) Counts ¶

func (n *NGramModel) Counts() []uint32

Counts gets the counts of the various N-grams in the model.

func (*NGramModel) Destroy ¶

func (n *NGramModel) Destroy() bool

func (*NGramModel) Flush ¶

func (n *NGramModel) Flush()

Flush any cached N-Gram information.

func (*NGramModel) NGramModel ¶

func (n *NGramModel) NGramModel() *pocketsphinx.NgramModel

NGramModel returns a retained copy of underlying reference to pocketsphinx.NgramModel.

func (*NGramModel) Probability ¶

func (n *NGramModel) Probability(words Strings) int32

Probability gets the "raw" log-probability for a general N-Gram.

This returns the log-probability of an N-Gram, as defined in the language model file, before any language weighting, interpolation, or insertion penalty has been applied.

When backing off to a unigram from a bigram or trigram, the unigram weight (interpolation with uniform) is not removed.

func (*NGramModel) QuickProbability ¶

func (n *NGramModel) QuickProbability(wordID int32, history []int32) (prob, nUsed int32)

Quick "raw" probability lookup for a general N-Gram.

See documentation for NGramModel.Score() and NGramModel.ApplyWeights() for an explanation of this.

func (*NGramModel) ReadClassDef ¶

func (n *NGramModel) ReadClassDef(filename String) bool

ReadClassDef reads a class definition file and add classes to a language model.

This function assumes that the class tags have already been defined as unigrams in the language model. All words in the class definition will be added to the vocabulary as special in-class words. For this reason is is necessary that they not have the same names as any words in the general unigram distribution. The convention is to suffix them with ":class_tag", where class_tag is the class tag minus the enclosing square brackets.

func (*NGramModel) Retain ¶

func (n *NGramModel) Retain()

func (*NGramModel) Score ¶

func (n *NGramModel) Score(wordID int32, history []int32) (score, nUsed int32)

Score get the score (scaled, interpolated log-probability) for a general N-Gram. See TrigramScore and BigramScore for particular cases.

If one of the words is not in the LM's vocabulary, the result will depend on whether this is an open or closed vocabulary language model. For an open-vocabulary model, unknown words are all mapped to the unigram "UNK" which has a non-zero probability and also participates in higher-order N-Grams. Therefore, you will get a score of some sort in this case.

For a closed-vocabulary model, unknown words are impossible and thus have zero probability. Therefore, if wordID is unknown, this function will return a "zero" log-probability, i.e. a large negative number. To obtain this number for comparison, call NGramModel.Zero().

func (*NGramModel) ScoreToProbability ¶

func (n *NGramModel) ScoreToProbability(score int32) int32

ScoreToProbability converts score to "raw" log-probability.

The unigram weight (interpolation with uniform) is not removed, since there is no way to know which order of N-Gram generated score.

func (*NGramModel) Size ¶

func (n *NGramModel) Size() int32

Size gets the order of the N-gram model (i.e. the "N" in "N-gram")

func (*NGramModel) TrigramScore ¶

func (n *NGramModel) TrigramScore(w3, w2, w1 int32) (score, nUsed int32)

TrigramScore does quick trigram score lookup.

func (*NGramModel) UnknownWordID ¶

func (n *NGramModel) UnknownWordID() int32

UnknownWordID gets the unknown word ID for a language model.

Language models can be either "open vocabulary" or "closed vocabulary". The difference is that the former assigns a fixed non-zero unigram probability to unknown words, while the latter does not allow unknown words (or, equivalently, it assigns them zero probability). If this is a closed vocabulary model, this function will return NGgramInvalidWordID.

The ID for the unknown word, or NGgramInvalidWordID if none exists.

func (*NGramModel) Weights ¶

func (n *NGramModel) Weights() (langWeight float32, wipLog int32)

Weights gets the current language weight from a language model and logarithm of word insertion penalty.

func (*NGramModel) Word ¶

func (n *NGramModel) Word(wordID int32) string

Word lookups word string for numerical wordID.

func (*NGramModel) WordID ¶

func (n *NGramModel) WordID(word String) int32

WordID lookups numerical word ID.

func (*NGramModel) WriteTo ¶

func (n *NGramModel) WriteTo(filename String, format NGramFileType) bool

WriteTo writes an N-Gram model to disk.

func (*NGramModel) Zero ¶

func (n *NGramModel) Zero() int32

Zero gets the "zero" log-probability value for a language model.

type NGramOptions ¶

type NGramOptions struct {
	// contains filtered or unexported fields
}

NGramOptions is an utility to construct CommandLn for NewNGramModel.

func (*NGramOptions) CommandLn ¶

func (n *NGramOptions) CommandLn() *pocketsphinx.CommandLn

func (*NGramOptions) Destroy ¶

func (n *NGramOptions) Destroy() bool

func (*NGramOptions) LanguageWeight ¶

func (n *NGramOptions) LanguageWeight(v float32)

LanguageWeight to apply to the model.

func (*NGramOptions) MMap ¶

func (n *NGramOptions) MMap(v bool)

MMap options sets whether to use memory-mapped I/O.

func (*NGramOptions) WordInsertionPenalty ¶

func (n *NGramOptions) WordInsertionPenalty(v float32)

WordInsertionPenalty to apply to the model.

type Option ¶

type Option func(c *Config)

func AScaleOption ¶

func AScaleOption(scale float32) Option

AScaleOption sets inverse of acoustic model scale for confidence score calculation.

Default: 20.0

func AWeightOption ¶

func AWeightOption(weight int) Option

AWeightOption sets inverse weight applied to acoustic scores.

Default: 1

func AllPhoneCIOption ¶

func AllPhoneCIOption(ciUnitsOnly bool) Option

AllPhoneCIOption enables perform phoneme decoding with phonetic lm and context-independent units only.

Default: false

func AllPhoneFileOption ¶

func AllPhoneFileOption(filepath string) Option

AllPhoneFileOption sets filepath for phoneme decoding with phonetic lm.

func BacktraceOption ¶

func BacktraceOption(backtrace bool) Option

BacktraceOption enables printing results and backtraces to log.

Default: false

func BeamOption ¶

func BeamOption(width float64) Option

BeamOption sets beam width applied to every frame in Viterbi search (smaller values mean wider beam).

Default: 1e-48

func BestPathLWeightOption ¶

func BestPathLWeightOption(weight float32) Option

BestPathLWeightOption sets language model probability weight for bestpath search.

Default: 9.5

func BestpathOption ¶

func BestpathOption(bestpath bool) Option

BestpathOption enables run bestpath (Dijkstra) search over word lattice (3rd pass).

Default: true

func CompAllSenOption ¶

func CompAllSenOption(compallsen bool) Option

CompAllSenOption enables compute all senone scores in every frame (can be faster when there are many senones).

Default: false

func DebugOption ¶

func DebugOption(level int) Option

DebugOption sets verbosity level for debugging messages.

func DictCaseOption ¶

func DictCaseOption(sens bool) Option

DictCaseOption enables if dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only).

Default: false

func DictFileOption ¶

func DictFileOption(filename string) Option

DictFileOption sets main pronunciation dictionary (lexicon) input file.

func DsRatioOption ¶

func DsRatioOption(ratio int) Option

DsRatioOption sets frame GMM computation downsampling ratio.

Default: 1

func FastFourierTransformOption ¶

func FastFourierTransformOption(points int) Option

FastFourierTransformOption

func FeatParamsFileOption ¶

func FeatParamsFileOption(filename string) Option

FeatParamsFileOption sets file containing feature extraction parameters.

func FillProbOption ¶

func FillProbOption(prob float32) Option

FillProbOption sets filler word transition probability.

Default: 1e-8

func FillerDictFileOption ¶

func FillerDictFileOption(filename string) Option

FillerDictFileOption sets noise word pronunciation dictionary input file.

func FiniteStateGrammarsOption ¶

func FiniteStateGrammarsOption(filepath string) Option

FiniteStateGrammarsOption for finite state grammars.

func FwdFlatBeamOption ¶

func FwdFlatBeamOption(width float64) Option

FwdFlatBeamOption sets beam width applied to every frame in second-pass flat search.

Default: 1e-64

func FwdFlatLWeightOption ¶

func FwdFlatLWeightOption(weight float32) Option

FwdFlatLWeightOption sets language model probability weight for flat lexicon (2nd pass) decoding.

Default: 8.5

func FwdFlatOption ¶

func FwdFlatOption(fwdflat bool) Option

FwdFlatOption enables run forward flat-lexicon search over word lattice (2nd pass).

Default: true

func FwdFlatSfWinOption ¶

func FwdFlatSfWinOption(frames int) Option

FwdFlatSfWinOption sets window of frames in lattice to search for successor words in fwdflat search.

Default: 25

func FwdFlatWBeamOption ¶

func FwdFlatWBeamOption(width float64) Option

FwdFlatWBeamOption sets beam width applied to word exits in second-pass flat search.

Default: 7e-29

func FwdFlateFWidOption ¶

func FwdFlateFWidOption(frames int) Option

FwdFlateFWidOption sets minimum number of end frames for a word to be searched in fwdflat search.

Default: 4

func FwdTreeOption ¶

func FwdTreeOption(fwdtree bool) Option

FwdTreeOption enables run forward lexicon-tree search (1st pass).

Default: true

func HMMDirOption ¶

func HMMDirOption(dir string) Option

HMMDirOption sets directory containing acoustic model files.

func InputEndianOption ¶

func InputEndianOption(endian string) Option

InputEndianOption sets endianess of the input.

Default: "little"

func KeyphraseOption ¶

func KeyphraseOption(keyphrase string) Option

KeyphraseOption sets keyphrase to spot.

func KeywordsDelayOption ¶

func KeywordsDelayOption(delay int) Option

KeywordsDelayOption sets delay to wait for best detection score.

Default: 10

func KeywordsFileOption ¶

func KeywordsFileOption(filename string) Option

KeywordsFileOption sets a file with keyphrases to spot, one per line.

func KeywordsPLPOption ¶

func KeywordsPLPOption(prob float64) Option

KeywordsPLPOption sets phone loop probability for keyphrase spotting.

Default: 1e-1

func KeywordsThresholdOption ¶

func KeywordsThresholdOption(threshold float64) Option

KeywordsThresholdOption threshold for p(hyp)/p(alternatives) ratio.

Default: 1.0

func LMFileOption ¶

func LMFileOption(filename string) Option

LMFileOption sets word trigram language model input file.

func LMNameOption ¶

func LMNameOption(name string) Option

LMNameOption sets which language model in LMSetOption to use by default.

func LMSetOption ¶

func LMSetOption(set string) Option

LMSetOption specifies a set of language model.

func LPBeamOption ¶

func LPBeamOption(width float64) Option

LPBeamOption sets beam width applied to last phone in words.

Default: 1e-40

func LPOnlyBeamOption ¶

func LPOnlyBeamOption(width float64) Option

LPOnlyBeamOption sets beam width applied to last phone in single-phone words.

Default: 7e-29

func LWeightOption ¶

func LWeightOption(weight float32) Option

LWeightOption sets language model probability weight.

Default: 6.5

func LatsizeOption ¶

func LatsizeOption(size int) Option

LatsizeOption sets initial backpointer table size.

Default: 5000

func LogBaseOption ¶

func LogBaseOption(base float32) Option

LogBaseOption sets base in which all log-likelihoods calculated.

Default: 1.0001

func LogFileOption ¶

func LogFileOption(filename string) Option

LogFileOption sets file to write log messages in.

func MDefFileOption ¶

func MDefFileOption(filename string) Option

MDefFileOption sets model definition input file.

func MFCLogDirOption ¶

func MFCLogDirOption(dir string) Option

MFCLogDirOption sets directory to log feature files to.

func MLLRFileOption ¶

func MLLRFileOption(filename string) Option

MLLRFileOption sets MLLR transformation to apply to means and variances.

func MMapOption ¶

func MMapOption(mmap bool) Option

MMapOption enables use of memory-mapped I/O (if possible) for model files.

Default: true

func MaxHMMPFOption ¶

func MaxHMMPFOption(max int) Option

MaxHMMPFOption sets maximum number of active HMMs to maintain at each frame (or -1 for no pruning).

Default: 30000

func MaxWPFOption ¶

func MaxWPFOption(max int) Option

MaxWPFOption sets maximum number of distinct word exits at each frame (or -1 for no pruning).

Default: -1

func MeansFileOption ¶

func MeansFileOption(filename string) Option

MeansFileOption sets mixture gaussian means input file.

func MinEndFrOption ¶

func MinEndFrOption(n int) Option

MinEndFrOption sets nodes ignored in lattice construction if they persist for fewer than N frames.

Default: 0

func MixWFileOption ¶

func MixWFileOption(filename string) Option

MixWFileOption sets senone mixture weights input file (uncompressed).

func MixWFloorOption ¶

func MixWFloorOption(floor float32) Option

MixWFloorOption sets senone mixture weights floor (applied to data from MixWFileOption file).

Default: 0.0000001

func NewWordPenaltyOption ¶

func NewWordPenaltyOption(penalty float32) Option

NewWordPenaltyOption sets new word transition penalty.

Default: 1.0

func PBeamOption ¶

func PBeamOption(width float64) Option

PBeamOption sets beam width applied to phone transitions.

Default: 1e-48

func PIPenaltyOption ¶

func PIPenaltyOption(penalty float32) Option

PIPenaltyOption sets phone insertion penalty.

Default: 1.0

func PLBeamOption ¶

func PLBeamOption(width float64) Option

PLBeamOption sets beam width applied to phone loop search for lookahead.

Default: 1e-10

func PLPBeamOption ¶

func PLPBeamOption(width float64) Option

PLPBeamOption sets beam width applied to phone loop transitions for lookahead.

Default: 1e-10

func PLWeightOption ¶

func PLWeightOption(weight float64) Option

PLWeightOption sets weight for phoneme lookahead penalties.

Default: 3.0

func PLWindowOption ¶

func PLWindowOption(frames int) Option

PLWindowOption sets phoneme lookahead window size, in frames.

Default: 5

func PipOption ¶

func PipOption(penalty float32) Option

PipOption sets phone insertion penalty for phone loop.

Default: 1.0

func RawLogDirOption ¶

func RawLogDirOption(dir string) Option

RawLogDirOption sets directory to log raw audio files to.

func SampleRateOption ¶

func SampleRateOption(rate float32) Option

SampleRateOption sets sample rate.

Default: 16000.0

func SenDumpFileOption ¶

func SenDumpFileOption(filename string) Option

SenDumpFileOption sets senone dump (compressed mixture weights) input file.

func SenLogDirOption ¶

func SenLogDirOption(dir string) Option

SenLogDirOption sets directory to log senone score files to.

func SilProbOption ¶

func SilProbOption(prob float32) Option

SilProbOption sets silence word transition probability.

Default: 0.005

func TMatFileOption ¶

func TMatFileOption(filename string) Option

TMatFileOption sets HMM state transition matrix input file.

func TMatFloorOption ¶

func TMatFloorOption(floor float32) Option

TMatFloorOption sets HMM state transition probability floor (applied to TMatFileOption file).

func TopNBeamOption ¶

func TopNBeamOption(width string) Option

TopNBeamOption sets beam width used to determine top-N Gaussians (or a list, per-feature).

Default: "0"

func TopNOption ¶

func TopNOption(max int) Option

TopNOption sets maximum number of top Gaussians to use in scoring.

Default: 4

func UnigramWeightOption ¶

func UnigramWeightOption(weight float32) Option

UnigramWeightOption sets unigram weight.

Default: 1.0

func UserOption ¶

func UserOption(name string, v interface{}) Option

UserOption sets a user specified option to a custom value.

func VarFileOption ¶

func VarFileOption(filename int) Option

VarFileOption sets mixture gaussian variances input file.

func VarFloorOption ¶

func VarFloorOption(floor float32) Option

VarFloorOption sets mixture gaussian variance floor (applied to data from VarFileOption file).

Default: 0.0001

func WBeamOption ¶

func WBeamOption(width float64) Option

WBeamOption sets beam width applied to word exits.

Default: 7e-29

func WIPenaltyOption ¶

func WIPenaltyOption(penalty float32) Option

WIPenaltyOption sets word insertion penalty.

Default: 0.65

type String ¶

type String string

func (String) S ¶

func (s String) S() string

type Strings ¶

type Strings []string

func (Strings) B ¶

func (s Strings) B() [][]byte

func (Strings) S ¶

func (s Strings) S() []string

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL