Documentation
¶
Index ¶
- Variables
- type Config
- type Decoder
- func (d *Decoder) AddWord(word, phones String, update bool) (id int32, ok bool)
- func (d *Decoder) AllDuration() (speech, cpu, wall time.Duration)
- func (d *Decoder) Config() *Config
- func (d *Decoder) Decoder() *pocketsphinx.Decoder
- func (d *Decoder) Destroy() bool
- func (d *Decoder) EndUtt() bool
- func (d *Decoder) FramesSearched() int32
- func (d *Decoder) Hypothesis() (hyp string, score int32)
- func (d *Decoder) IsInSpeech() bool
- func (d *Decoder) LogMath() *LogMath
- func (d *Decoder) LookupWord(word String) (string, bool)
- func (d *Decoder) NewLattice(filename String) (*Lattice, error)
- func (d *Decoder) Probability() int32
- func (d *Decoder) ProcessCep(data [][]float32, noSearch, fullUtterance bool) (frames int32, ok bool)
- func (d *Decoder) ProcessRaw(data []int16, noSearch, fullUtterance bool) (frames int32, ok bool)
- func (d *Decoder) RawData() []int16
- func (d *Decoder) ReadDict(dictFile, fillerDictFile String) bool
- func (d *Decoder) Reconfigure(cfg *Config)
- func (d *Decoder) SetKeyphrase(name string, keyphrase string) int32
- func (d *Decoder) SetKws(name string, keyfile string) int32
- func (d *Decoder) SetRawDataSize(frames int32)
- func (d *Decoder) SetSearch(name string) int32
- func (d *Decoder) StartStream() bool
- func (d *Decoder) StartUtt() bool
- func (d *Decoder) UpdateMLLR(mllr *MLLR) *MLLR
- func (d *Decoder) UttDuration() (speech, cpu, wall time.Duration)
- func (d *Decoder) WordLattice() *Lattice
- func (d *Decoder) WriteDict(dictFile String) bool
- type JSGF
- type JSGFRuleIter
- type Lattice
- func (l *Lattice) BaseWord(node *LatticeNode) string
- func (l *Lattice) BestPath(model *NGramModel, lwf, ascale float32) *LatticeLink
- func (l *Lattice) Destroy() bool
- func (l *Lattice) Frames() int32
- func (l Lattice) Iter() *LatticeNodeIter
- func (l *Lattice) Lattice() *pocketsphinx.Lattice
- func (l *Lattice) LinkBaseWord(link *LatticeLink) string
- func (l *Lattice) LinkProbability(link *LatticeLink) (score, prob int32)
- func (l *Lattice) LinkWord(link *LatticeLink) string
- func (l *Lattice) LogMath() *LogMath
- func (l *Lattice) NewLink(from, to *LatticeNode, score, endFrame int32)
- func (l *Lattice) Posterior(model *NGramModel, ascale float32) int32
- func (l *Lattice) PosteriorPrune(model *NGramModel, ascale float32) int32
- func (l *Lattice) ProbabilityOf(node *LatticeNode) (*LatticeLink, int32)
- func (l *Lattice) Retain()
- func (l *Lattice) ReverseEdges(start, end *LatticeNode) *LatticeLink
- func (l *Lattice) ReverseNext(start *LatticeNode) *LatticeLink
- func (l *Lattice) TraverseEdges(start, end *LatticeNode) *LatticeLink
- func (l *Lattice) TraverseNext(end *LatticeNode) *LatticeLink
- func (l *Lattice) Word(node *LatticeNode) string
- func (l *Lattice) WriteTo(filename String) bool
- func (l *Lattice) WriteToHTK(filename String) bool
- type LatticeLink
- type LatticeLinkIter
- type LatticeNode
- type LatticeNodeIter
- type LogMath
- func (l LogMath) Add(p, q int32) int32
- func (l LogMath) AddExact(p, q int32) int32
- func (l *LogMath) Destroy() bool
- func (l LogMath) Exp(p int32) float64
- func (l LogMath) GetBase() float64
- func (l LogMath) GetShift() int32
- func (l LogMath) GetTableShape() (size, width, shift uint32, ok bool)
- func (l LogMath) GetWidth() int32
- func (l LogMath) GetZero() int32
- func (l LogMath) LnToLog(p float64) int32
- func (l LogMath) Log(p float64) int32
- func (l LogMath) Log10ToLog(p float64) int32
- func (l LogMath) Log10ToLogFloat(p float64) float32
- func (l LogMath) LogFloatToLog10(p float32) float64
- func (l *LogMath) LogMath() *pocketsphinx.Logmath
- func (l LogMath) LogToLn(p int32) float64
- func (l LogMath) LogToLog10(p int32) float64
- func (l *LogMath) Retain()
- func (l LogMath) WriteTo(filename String) bool
- type MLLR
- type NGramCase
- type NGramFileType
- type NGramModel
- func (n *NGramModel) AddClass(className String, weight float32, words Strings, weights []float32) bool
- func (n *NGramModel) AddClassWord(className, word String, weight float32) int32
- func (n *NGramModel) AddWord(word String, weight float32) int32
- func (n *NGramModel) ApplyWeights(langWeight, insertionPenalty float32) bool
- func (n *NGramModel) BigramScore(w2, w1 int32) (score, nUsed int32)
- func (n *NGramModel) CaseFold(c NGramCase) bool
- func (n *NGramModel) Counts() []uint32
- func (n *NGramModel) Destroy() bool
- func (n *NGramModel) Flush()
- func (n *NGramModel) NGramModel() *pocketsphinx.NgramModel
- func (n *NGramModel) Probability(words Strings) int32
- func (n *NGramModel) QuickProbability(wordID int32, history []int32) (prob, nUsed int32)
- func (n *NGramModel) ReadClassDef(filename String) bool
- func (n *NGramModel) Retain()
- func (n *NGramModel) Score(wordID int32, history []int32) (score, nUsed int32)
- func (n *NGramModel) ScoreToProbability(score int32) int32
- func (n *NGramModel) Size() int32
- func (n *NGramModel) TrigramScore(w3, w2, w1 int32) (score, nUsed int32)
- func (n *NGramModel) UnknownWordID() int32
- func (n *NGramModel) Weights() (langWeight float32, wipLog int32)
- func (n *NGramModel) Word(wordID int32) string
- func (n *NGramModel) WordID(word String) int32
- func (n *NGramModel) WriteTo(filename String, format NGramFileType) bool
- func (n *NGramModel) Zero() int32
- type NGramOptions
- type Option
- func AScaleOption(scale float32) Option
- func AWeightOption(weight int) Option
- func AllPhoneCIOption(ciUnitsOnly bool) Option
- func AllPhoneFileOption(filepath string) Option
- func BacktraceOption(backtrace bool) Option
- func BeamOption(width float64) Option
- func BestPathLWeightOption(weight float32) Option
- func BestpathOption(bestpath bool) Option
- func CompAllSenOption(compallsen bool) Option
- func DebugOption(level int) Option
- func DictCaseOption(sens bool) Option
- func DictFileOption(filename string) Option
- func DsRatioOption(ratio int) Option
- func FastFourierTransformOption(points int) Option
- func FeatParamsFileOption(filename string) Option
- func FillProbOption(prob float32) Option
- func FillerDictFileOption(filename string) Option
- func FiniteStateGrammarsOption(filepath string) Option
- func FwdFlatBeamOption(width float64) Option
- func FwdFlatLWeightOption(weight float32) Option
- func FwdFlatOption(fwdflat bool) Option
- func FwdFlatSfWinOption(frames int) Option
- func FwdFlatWBeamOption(width float64) Option
- func FwdFlateFWidOption(frames int) Option
- func FwdTreeOption(fwdtree bool) Option
- func HMMDirOption(dir string) Option
- func InputEndianOption(endian string) Option
- func KeyphraseOption(keyphrase string) Option
- func KeywordsDelayOption(delay int) Option
- func KeywordsFileOption(filename string) Option
- func KeywordsPLPOption(prob float64) Option
- func KeywordsThresholdOption(threshold float64) Option
- func LMFileOption(filename string) Option
- func LMNameOption(name string) Option
- func LMSetOption(set string) Option
- func LPBeamOption(width float64) Option
- func LPOnlyBeamOption(width float64) Option
- func LWeightOption(weight float32) Option
- func LatsizeOption(size int) Option
- func LogBaseOption(base float32) Option
- func LogFileOption(filename string) Option
- func MDefFileOption(filename string) Option
- func MFCLogDirOption(dir string) Option
- func MLLRFileOption(filename string) Option
- func MMapOption(mmap bool) Option
- func MaxHMMPFOption(max int) Option
- func MaxWPFOption(max int) Option
- func MeansFileOption(filename string) Option
- func MinEndFrOption(n int) Option
- func MixWFileOption(filename string) Option
- func MixWFloorOption(floor float32) Option
- func NewWordPenaltyOption(penalty float32) Option
- func PBeamOption(width float64) Option
- func PIPenaltyOption(penalty float32) Option
- func PLBeamOption(width float64) Option
- func PLPBeamOption(width float64) Option
- func PLWeightOption(weight float64) Option
- func PLWindowOption(frames int) Option
- func PipOption(penalty float32) Option
- func RawLogDirOption(dir string) Option
- func SampleRateOption(rate float32) Option
- func SenDumpFileOption(filename string) Option
- func SenLogDirOption(dir string) Option
- func SilProbOption(prob float32) Option
- func TMatFileOption(filename string) Option
- func TMatFloorOption(floor float32) Option
- func TopNBeamOption(width string) Option
- func TopNOption(max int) Option
- func UnigramWeightOption(weight float32) Option
- func UserOption(name string, v interface{}) Option
- func VarFileOption(filename int) Option
- func VarFloorOption(floor float32) Option
- func WBeamOption(width float64) Option
- func WIPenaltyOption(penalty float32) Option
- type String
- type Strings
Constants ¶
This section is empty.
Variables ¶
var NGgramInvalidWordID int32 = pocketsphinx.NgramInvalidWid
Functions ¶
This section is empty.
Types ¶
type Config ¶
type Config struct {
// contains filtered or unexported fields
}
func NewConfig ¶
NewConfig creates a new command-line argument set based on the provided config options.
func NewConfigRetain ¶
func NewConfigRetain(ln *pocketsphinx.CommandLn) *Config
NewConfigRetain gets a new config while retaining ownership of a command-line argument set.
func (*Config) CommandLn ¶
func (c *Config) CommandLn() *pocketsphinx.CommandLn
type Decoder ¶
type Decoder struct {
// contains filtered or unexported fields
}
func NewDecoder ¶
NewDecoder initializes the decoder from a configuration object.
func (*Decoder) AddWord ¶
AddWord adds a word to the pronunciation dictionary.
This function adds a word to the pronunciation dictionary and the current language model (but not to the current FSG if FSG mode is enabled). If the word is already present in one or the other, it does whatever is necessary to ensure that the word can be recognized.
word is a word string to add, e.g. "hello". phones is a whitespace-separated list of phoneme strings describing pronunciation of the word, e.g. "HH AH L OW". If update is true, updates the search module (whichever one is currently active) to recognize the newly added word. If adding multiple words, it is more efficient to pass false here in all but the last word.
Returns the internal ID (>= 0) of the newly added word.
func (*Decoder) AllDuration ¶
AllDuration gets overall performance information.
speech — number of seconds of speech. cpu — number of seconds of CPU time used. wall — number of seconds of wall time used.
func (*Decoder) Decoder ¶
func (d *Decoder) Decoder() *pocketsphinx.Decoder
Decoder returns a retained copy of underlying reference to pocketsphinx.Decoder.
func (*Decoder) FramesSearched ¶
FramesSearched gets the number of frames of data searched.
Note that there is a delay between this and the number of frames of audio which have been input to the system. This is due to the fact that acoustic features are computed using a sliding window of audio, and dynamic features are computed over a sliding window of acoustic features.
Returns number of frames of speech data which have been recognized so far.
func (*Decoder) Hypothesis ¶
Hypothesis gets hypothesis string and path score.
Returns string containing best hypothesis at this point in decoding. Empty if no hypothesis is available. And path score of that string.
func (*Decoder) IsInSpeech ¶
IsInSpeech checks if the last feed audio buffer contained speech.
func (*Decoder) LogMath ¶
LogMath gets the log-math computation object for this decoder.
The decoder retains ownership of this pointer, so you should not attempt to free it manually. Use LogMath.Retain() if you wish to reuse it elsewhere.
func (*Decoder) LookupWord ¶
LookupWord lookups for the word in the dictionary and returns phone transcription for it.
Returns whitespace-spearated phone string describing the pronunciation of the word, or empty string if word is not present in the dictionary.
func (*Decoder) NewLattice ¶
NewLattice reads a lattice from a file on disk.
func (*Decoder) Probability ¶
Probability gets posterior probability of the best hypothesis.
Unless the BestpathOption option is enabled, this function will always return zero (corresponding to a posterior probability of 1.0). Even if BestpathOption is enabled, it will also return zero when called on a partial result. Ongoing research into effective confidence annotation for partial hypotheses may result in these restrictions being lifted in future versions.
func (*Decoder) ProcessCep ¶
func (d *Decoder) ProcessCep(data [][]float32, noSearch, fullUtterance bool) (frames int32, ok bool)
ProcessCep decodes acoustic feature data.
If noSearch is enabled, performs feature extraction but does no any recognition yet. This may be necessary if your processor has trouble doing recognition in real-time.
fullUtterance shows that this block of data is a full utterance worth of data. This may allow the recognizer to produce more accurate results.
Returns number of frames of data searched.
func (*Decoder) ProcessRaw ¶
ProcessRaw decodes a raw audio stream.
No headers are recognized in this files. The configuration parameters SampleRateOption and InputEndianOption are used to determine the sampling rate and endianness of the stream, respectively. Audio is always assumed to be 16-bit signed PCM.
If noSearch is enabled, performs feature extraction but does no any recognition yet. This may be necessary if your processor has trouble doing recognition in real-time.
fullUtterance shows that this block of data is a full utterance worth of data. This may allow the recognizer to produce more accurate results.
Returns number of frames of data searched.
func (*Decoder) ReadDict ¶
ReadDict reloads the pronunciation dictionary from a file.
This function replaces the current pronunciation dictionary with the one stored in dictFile. This also causes the active search module(s) to be reinitialized, in the same manner as calling Decoder.AddWord() with update=true.
dictFile is the path to dictionary file to load. fillerDictFile is the path to filler dictionary to load, or empty string to keep the existing filler dictionary.
func (*Decoder) Reconfigure ¶
Reconfigure reinitializes the decoder with updated configuration.
This function allows you to switch the acoustic model, dictionary, or other configuration without creating an entirely new decoding object.
An optional new configuration to use. If cfg is nil, the previous configuration will be reloaded, with any changes applied.
func (*Decoder) SetKeyphrase ¶
SetKeyphrase associates keyword search with the provided name. Activate with Decoder.SetSearch()
func (*Decoder) SetKws ¶
SetKws associates keyword search with the provided file. Activate with Decoder.SetSearch()
func (*Decoder) SetRawDataSize ¶
SetRawDataSize sets the limit of the raw audio data to store in decoder to retrieve it later with Decoder.RawData().
func (*Decoder) SetSearch ¶
SetSearch activates the named search. The search must be set beforehand with Decoder.SetKeyphrase() or Decoder.SetKws()
func (*Decoder) StartStream ¶
StartStream starts processing of the stream of speech. Channel parameters like noise-level are maintained for the stream and reused among utterances. Times returned in segment iterators are also stream-wide.
func (*Decoder) StartUtt ¶
StartUtt starts utterance processing. This function should be called before any utterance data is passed to the decoder. It marks the start of a new utterance and reinitializes internal data structures.
func (*Decoder) UpdateMLLR ¶
UpdateMLLR adapts current acoustic model using a linear transform (Maximum Likelihood Linear Regression).
mllr is the new transform to use, or nil to update the existing transform. The decoder retains ownership of this pointer, so you should not attempt to free it manually. Use MLLR.Retain() if you wish to reuse it elsewhere.
Returns the updated transform object for this decoder, or nil on failure.
func (*Decoder) UttDuration ¶
UttDuration gets performance information for the current utterance.
speech — number of seconds of speech. cpu — number of seconds of CPU time used. wall — number of seconds of wall time used.
func (*Decoder) WordLattice ¶
WordLattice gets the word lattice object containing all hypotheses so far.
The pointer is owned by the decoder and you should not attempt to free it manually. It is only valid until the next utterance, unless you use Lattice.Retain() to retain it.
type JSGF ¶
type JSGF struct {
// contains filtered or unexported fields
}
func NewJSGFGrammar ¶
NewJSGFGrammar creates a new JSGF grammar. Parent is optional parent grammar for this one (nil, usually). Rturns new JSGF grammar object, or nil on failure.
func (*JSGF) GrammarName ¶
type JSGFRuleIter ¶
type JSGFRuleIter pocketsphinx.JSGFRuleIter
type Lattice ¶
type Lattice struct {
// contains filtered or unexported fields
}
Lattice word graph structure used in bestpath/nbest search.
func (*Lattice) BaseWord ¶
func (l *Lattice) BaseWord(node *LatticeNode) string
BaseWord gets base word string for this node.
func (*Lattice) BestPath ¶
func (l *Lattice) BestPath(model *NGramModel, lwf, ascale float32) *LatticeLink
BestPath does N-Gram based best-path search on a word graph using A*. Returns the final link in best path or nil upon error.
This function calculates both the best path as well as the forward probability used in confidence estimation.
func (Lattice) Iter ¶
func (l Lattice) Iter() *LatticeNodeIter
Iter starts iterating over nodes in the lattice.
No particular order of traversal is guaranteed, and you should not depend on this.
func (*Lattice) Lattice ¶
func (l *Lattice) Lattice() *pocketsphinx.Lattice
Lattice returns a retained copy of underlying reference to pocketsphinx.Lattice.
func (*Lattice) LinkBaseWord ¶
func (l *Lattice) LinkBaseWord(link *LatticeLink) string
LinkBaseWord gets base word string from a lattice link.
func (*Lattice) LinkProbability ¶
func (l *Lattice) LinkProbability(link *LatticeLink) (score, prob int32)
LinkProbability gets acoustic score and posterior probability from a lattice link.
Posterior probability for this link. Log is expressed in the log-base used in the decoder. To convert to linear floating-point, use Lattice.LogMath().Exp(prob).
func (*Lattice) LinkWord ¶
func (l *Lattice) LinkWord(link *LatticeLink) string
LinkWord gets word string from a lattice link (possibly a pronunciation variant).
func (*Lattice) LogMath ¶
LogMath gets the log-math computation object for this lattice.
The lattice retains ownership of this pointer, so you should not attempt to free it manually. Use LogMath.Retain() if you wish to reuse it elsewhere.
func (*Lattice) NewLink ¶
func (l *Lattice) NewLink(from, to *LatticeNode, score, endFrame int32)
NewLink creates a directed link between from and to nodes, but if a link already exists, chooses one with the best score.
func (*Lattice) Posterior ¶
func (l *Lattice) Posterior(model *NGramModel, ascale float32) int32
Calculate link posterior probabilities on a word graph. Returns posterior probability of the utterance as a whole.
WARN: This function assumes that Lattice.BestPath() search has already been done.
func (*Lattice) PosteriorPrune ¶
func (l *Lattice) PosteriorPrune(model *NGramModel, ascale float32) int32
PosteriorPrune prunes all links (and associated nodes) below a certain posterior probability, return number of arcs removed.
beam represents the minimum posterior probability for links. This is expressed in the log-base used in the decoder. To convert from linear floating-point, use Lattice.LogMath().Log(prob).
WARN: This function assumes that Lattice.Posterior() has already been called.
func (*Lattice) ProbabilityOf ¶
func (l *Lattice) ProbabilityOf(node *LatticeNode) (*LatticeLink, int32)
ProbabilityOf node gets the best posterior probability and associated acoustic score from a lattice node. Returns exit link with highest posterior probability and the probability of this link.
Log is expressed in the log-base used in the decoder. To convert to linear floating-point, use Lattice.LogMath().Exp(prob).
func (*Lattice) ReverseEdges ¶
func (l *Lattice) ReverseEdges(start, end *LatticeNode) *LatticeLink
ReverseEdges starts a reverse traversal of edges in a word graph.
See Lattice.TraverseEdges() for why this API is the way it is.
func (*Lattice) ReverseNext ¶
func (l *Lattice) ReverseNext(start *LatticeNode) *LatticeLink
ReverseNext gets the next link in reverse traversal.
func (*Lattice) TraverseEdges ¶
func (l *Lattice) TraverseEdges(start, end *LatticeNode) *LatticeLink
TraverseEdges starts a forward traversal of edges in a word graph.
A keen eye will notice an inconsistency in this API versus other types of iterators in PocketSphinx. The reason for this is that the traversal algorithm is much more efficient when it is able to modify the lattice structure. Therefore, to avoid giving the impression that multiple traversals are possible at once, no separate iterator structure is provided.
func (*Lattice) TraverseNext ¶
func (l *Lattice) TraverseNext(end *LatticeNode) *LatticeLink
TraverseNext gets the next link in forward traversal.
func (*Lattice) Word ¶
func (l *Lattice) Word(node *LatticeNode) string
Word gets word string for this node.
func (*Lattice) WriteToHTK ¶
WriteToHTK writes a lattice to disk in HTK format.
type LatticeLink ¶
type LatticeLink pocketsphinx.Latlink
LatticeLink represents links between DAG nodes.
A link corresponds to a single hypothesized instance of a word with a given start and end point.
func (*LatticeLink) Nodes ¶
func (l *LatticeLink) Nodes() (source, dest *LatticeNode)
Nodes gets destination and source nodes from a lattice link
func (*LatticeLink) Prev ¶
func (l *LatticeLink) Prev() *LatticeLink
Prev gets predecessor link in best path.
func (*LatticeLink) Times ¶
func (l *LatticeLink) Times() (start int32, end int16)
Times gets start and end times from a lattice link.
start - start frame of this link. end - end frame of this link.
These are inclusive, i.e. the last frame of this word is end, not end-1.
type LatticeLinkIter ¶
type LatticeLinkIter pocketsphinx.LatlinkIter
LatticeLinkIter iterator over DAG links.
func (*LatticeLinkIter) Close ¶
func (l *LatticeLinkIter) Close()
Close stops the iteration over links.
func (*LatticeLinkIter) Link ¶
func (l *LatticeLinkIter) Link() *LatticeLink
Link gets link from iterator.
func (*LatticeLinkIter) Next ¶
func (l *LatticeLinkIter) Next() *LatticeLinkIter
Next gets next link from a lattice link iterator.
type LatticeNode ¶
type LatticeNode pocketsphinx.Latnode
LatticeNode represents DAG nodes.
A node corresponds to a number of hypothesized instances of a word which all share the same starting point.
func (*LatticeNode) Entries ¶
func (l *LatticeNode) Entries() *LatticeLinkIter
Entries returns an iterator over entries to this node.
func (*LatticeNode) Exits ¶
func (l *LatticeNode) Exits() *LatticeLinkIter
Exits returns an iterator over exits from this node.
func (*LatticeNode) Times ¶
func (l *LatticeNode) Times() (start int32, first, last int16)
Times gets start and end time range for a node.
first — end frame of first exit from this node. last — end frame of last exit from this node. start — start frame for all edges exiting this node.
type LatticeNodeIter ¶
type LatticeNodeIter pocketsphinx.LatnodeIter
LatticeNodeIter iterator over DAG nodes.
func (*LatticeNodeIter) Next ¶
func (l *LatticeNodeIter) Next() *LatticeNodeIter
Next moves to next node in iteration.
func (*LatticeNodeIter) Node ¶
func (l *LatticeNodeIter) Node() *LatticeNode
Node gets node from iterator.
type LogMath ¶
type LogMath struct {
// contains filtered or unexported fields
}
LogMath integer log math computation class.
func (LogMath) AddExact ¶
AddExact adds two values in log space exactly and slowly (without using add table).
func (LogMath) GetTableShape ¶
GetTableShape gets the log table size and dimensions.
func (LogMath) Log10ToLog ¶
Log10ToLog converts base 10 log (in floating point) to integer log in base B.
func (LogMath) Log10ToLogFloat ¶
Log10ToLogFloat converts base 10 log (in floating point) to float log in base B.
func (LogMath) LogFloatToLog10 ¶
LogFloatToLog10 converts float log in base B to base 10 log.
func (*LogMath) LogMath ¶
func (l *LogMath) LogMath() *pocketsphinx.Logmath
LogMath returns a retained copy of underlying reference to pocketsphinx.Logmath.
func (LogMath) LogToLog10 ¶
LogToLog10 converts integer log in base B to base 10 log (in floating point).
type MLLR ¶
type MLLR struct {
// contains filtered or unexported fields
}
func NewMLLR ¶
NewMLLR reads a speaker-adaptive linear transform from a file (mllr_matrix). See http://cmusphinx.sourceforge.net/wiki/tutorialadapt for details.
func (*MLLR) MLLR ¶
func (m *MLLR) MLLR() *pocketsphinx.Mllr
MLLR returns a retained copy of underlying reference to pocketsphinx.Mllr.
type NGramCase ¶
type NGramCase int32
NgramCase as declared in sphinxbase/ngram_model.h:166
const ( NGramUpper NGramCase = NGramCase(pocketsphinx.NgramUpper) NGramLower NGramCase = NGramCase(pocketsphinx.NgramLower) )
Constants for case folding.
type NGramFileType ¶
type NGramFileType int32
NgramFileType as declared in sphinxbase/ngram_model.h:81
const ( // NGramInvalid is not a valid file type. NGramInvalid NGramFileType = NGramFileType(pocketsphinx.NgramInvalid) // NGramAuto to determine file type automatically. NGramAuto NGramFileType = NGramFileType(pocketsphinx.NgramAuto) // NGramArpa is for ARPABO text format (the standard). NGramArpa NGramFileType = NGramFileType(pocketsphinx.NgramArpa) // NGramBin is for sphinx .DMP format. NGramBin NGramFileType = NGramFileType(pocketsphinx.NgramBin) )
File types for N-Gram files.
type NGramModel ¶
type NGramModel struct {
// contains filtered or unexported fields
}
NGramModel is a type representing an N-Gram based language model.
func NewNGramModel ¶
func NewNGramModel(fileName String, fileType NGramFileType, lmath *LogMath, opt ...NGramOptions) (*NGramModel, error)
NewNGramModel reads an N-Gram model from a file on disk.
lmath carries log-math parameters to use for probability calculations. Ownership of this object is assumed by the newly created NGramModel, and you should not attempt to free it manually. If you wish to reuse it elsewhere, you must retain it with LogMath.Retain().
func (*NGramModel) AddClass ¶
func (n *NGramModel) AddClass(className String, weight float32, words Strings, weights []float32) bool
Add a new class to a language model.
If className already exists in the unigram set for NGramModel, then it will be converted to a class tag, and weight will be ignored. Otherwise, a new unigram will be created as in NGramModel.AddWord().
func (*NGramModel) AddClassWord ¶
func (n *NGramModel) AddClassWord(className, word String, weight float32) int32
AddClassWord adds a word to a class in a language model with a weight of this word relative to the within-class uniform distribution. Returns word ID.
func (*NGramModel) AddWord ¶
func (n *NGramModel) AddWord(word String, weight float32) int32
AddWord adds a word (unigram) to the language model and returns the word ID for the new word.
The semantics of this are not particularly well-defined for model sets, and may be subject to change. Currently this will add the word to all of the submodels
func (*NGramModel) ApplyWeights ¶
func (n *NGramModel) ApplyWeights(langWeight, insertionPenalty float32) bool
ApplyWeights applies a language weight and insertion penalty weight to a language model.
This will change the values output by NGramModel.Score() and friends. This is done for efficiency since in decoding, these are the only values we actually need. Call NGramModel.Probability() if you want the "raw" N-Gram probability estimate.
To remove all weighting, call NGramModel.ApplyWeights(1.0, 1.0).
func (*NGramModel) BigramScore ¶
func (n *NGramModel) BigramScore(w2, w1 int32) (score, nUsed int32)
TrigramScore does quick bigram score lookup.
func (*NGramModel) CaseFold ¶
func (n *NGramModel) CaseFold(c NGramCase) bool
CaseFold word strings in an N-Gram model.
WARNING: This is not Unicode aware, so any non-ASCII characters will not be converted.
func (*NGramModel) Counts ¶
func (n *NGramModel) Counts() []uint32
Counts gets the counts of the various N-grams in the model.
func (*NGramModel) Destroy ¶
func (n *NGramModel) Destroy() bool
func (*NGramModel) NGramModel ¶
func (n *NGramModel) NGramModel() *pocketsphinx.NgramModel
NGramModel returns a retained copy of underlying reference to pocketsphinx.NgramModel.
func (*NGramModel) Probability ¶
func (n *NGramModel) Probability(words Strings) int32
Probability gets the "raw" log-probability for a general N-Gram.
This returns the log-probability of an N-Gram, as defined in the language model file, before any language weighting, interpolation, or insertion penalty has been applied.
When backing off to a unigram from a bigram or trigram, the unigram weight (interpolation with uniform) is not removed.
func (*NGramModel) QuickProbability ¶
func (n *NGramModel) QuickProbability(wordID int32, history []int32) (prob, nUsed int32)
Quick "raw" probability lookup for a general N-Gram.
See documentation for NGramModel.Score() and NGramModel.ApplyWeights() for an explanation of this.
func (*NGramModel) ReadClassDef ¶
func (n *NGramModel) ReadClassDef(filename String) bool
ReadClassDef reads a class definition file and add classes to a language model.
This function assumes that the class tags have already been defined as unigrams in the language model. All words in the class definition will be added to the vocabulary as special in-class words. For this reason is is necessary that they not have the same names as any words in the general unigram distribution. The convention is to suffix them with ":class_tag", where class_tag is the class tag minus the enclosing square brackets.
func (*NGramModel) Retain ¶
func (n *NGramModel) Retain()
func (*NGramModel) Score ¶
func (n *NGramModel) Score(wordID int32, history []int32) (score, nUsed int32)
Score get the score (scaled, interpolated log-probability) for a general N-Gram. See TrigramScore and BigramScore for particular cases.
If one of the words is not in the LM's vocabulary, the result will depend on whether this is an open or closed vocabulary language model. For an open-vocabulary model, unknown words are all mapped to the unigram "UNK" which has a non-zero probability and also participates in higher-order N-Grams. Therefore, you will get a score of some sort in this case.
For a closed-vocabulary model, unknown words are impossible and thus have zero probability. Therefore, if wordID is unknown, this function will return a "zero" log-probability, i.e. a large negative number. To obtain this number for comparison, call NGramModel.Zero().
func (*NGramModel) ScoreToProbability ¶
func (n *NGramModel) ScoreToProbability(score int32) int32
ScoreToProbability converts score to "raw" log-probability.
The unigram weight (interpolation with uniform) is not removed, since there is no way to know which order of N-Gram generated score.
func (*NGramModel) Size ¶
func (n *NGramModel) Size() int32
Size gets the order of the N-gram model (i.e. the "N" in "N-gram")
func (*NGramModel) TrigramScore ¶
func (n *NGramModel) TrigramScore(w3, w2, w1 int32) (score, nUsed int32)
TrigramScore does quick trigram score lookup.
func (*NGramModel) UnknownWordID ¶
func (n *NGramModel) UnknownWordID() int32
UnknownWordID gets the unknown word ID for a language model.
Language models can be either "open vocabulary" or "closed vocabulary". The difference is that the former assigns a fixed non-zero unigram probability to unknown words, while the latter does not allow unknown words (or, equivalently, it assigns them zero probability). If this is a closed vocabulary model, this function will return NGgramInvalidWordID.
The ID for the unknown word, or NGgramInvalidWordID if none exists.
func (*NGramModel) Weights ¶
func (n *NGramModel) Weights() (langWeight float32, wipLog int32)
Weights gets the current language weight from a language model and logarithm of word insertion penalty.
func (*NGramModel) Word ¶
func (n *NGramModel) Word(wordID int32) string
Word lookups word string for numerical wordID.
func (*NGramModel) WordID ¶
func (n *NGramModel) WordID(word String) int32
WordID lookups numerical word ID.
func (*NGramModel) WriteTo ¶
func (n *NGramModel) WriteTo(filename String, format NGramFileType) bool
WriteTo writes an N-Gram model to disk.
func (*NGramModel) Zero ¶
func (n *NGramModel) Zero() int32
Zero gets the "zero" log-probability value for a language model.
type NGramOptions ¶
type NGramOptions struct {
// contains filtered or unexported fields
}
NGramOptions is an utility to construct CommandLn for NewNGramModel.
func (*NGramOptions) CommandLn ¶
func (n *NGramOptions) CommandLn() *pocketsphinx.CommandLn
func (*NGramOptions) Destroy ¶
func (n *NGramOptions) Destroy() bool
func (*NGramOptions) LanguageWeight ¶
func (n *NGramOptions) LanguageWeight(v float32)
LanguageWeight to apply to the model.
func (*NGramOptions) MMap ¶
func (n *NGramOptions) MMap(v bool)
MMap options sets whether to use memory-mapped I/O.
func (*NGramOptions) WordInsertionPenalty ¶
func (n *NGramOptions) WordInsertionPenalty(v float32)
WordInsertionPenalty to apply to the model.
type Option ¶
type Option func(c *Config)
func AScaleOption ¶
AScaleOption sets inverse of acoustic model scale for confidence score calculation.
Default: 20.0
func AllPhoneCIOption ¶
AllPhoneCIOption enables perform phoneme decoding with phonetic lm and context-independent units only.
Default: false
func AllPhoneFileOption ¶
AllPhoneFileOption sets filepath for phoneme decoding with phonetic lm.
func BacktraceOption ¶
BacktraceOption enables printing results and backtraces to log.
Default: false
func BeamOption ¶
BeamOption sets beam width applied to every frame in Viterbi search (smaller values mean wider beam).
Default: 1e-48
func BestPathLWeightOption ¶
BestPathLWeightOption sets language model probability weight for bestpath search.
Default: 9.5
func BestpathOption ¶
BestpathOption enables run bestpath (Dijkstra) search over word lattice (3rd pass).
Default: true
func CompAllSenOption ¶
CompAllSenOption enables compute all senone scores in every frame (can be faster when there are many senones).
Default: false
func DebugOption ¶
DebugOption sets verbosity level for debugging messages.
func DictCaseOption ¶
DictCaseOption enables if dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only).
Default: false
func DictFileOption ¶
DictFileOption sets main pronunciation dictionary (lexicon) input file.
func FastFourierTransformOption ¶
FastFourierTransformOption
func FeatParamsFileOption ¶
FeatParamsFileOption sets file containing feature extraction parameters.
func FillerDictFileOption ¶
FillerDictFileOption sets noise word pronunciation dictionary input file.
func FiniteStateGrammarsOption ¶
FiniteStateGrammarsOption for finite state grammars.
func FwdFlatBeamOption ¶
FwdFlatBeamOption sets beam width applied to every frame in second-pass flat search.
Default: 1e-64
func FwdFlatLWeightOption ¶
FwdFlatLWeightOption sets language model probability weight for flat lexicon (2nd pass) decoding.
Default: 8.5
func FwdFlatOption ¶
FwdFlatOption enables run forward flat-lexicon search over word lattice (2nd pass).
Default: true
func FwdFlatSfWinOption ¶
FwdFlatSfWinOption sets window of frames in lattice to search for successor words in fwdflat search.
Default: 25
func FwdFlatWBeamOption ¶
FwdFlatWBeamOption sets beam width applied to word exits in second-pass flat search.
Default: 7e-29
func FwdFlateFWidOption ¶
FwdFlateFWidOption sets minimum number of end frames for a word to be searched in fwdflat search.
Default: 4
func FwdTreeOption ¶
FwdTreeOption enables run forward lexicon-tree search (1st pass).
Default: true
func HMMDirOption ¶
HMMDirOption sets directory containing acoustic model files.
func KeyphraseOption ¶
KeyphraseOption sets keyphrase to spot.
func KeywordsDelayOption ¶
KeywordsDelayOption sets delay to wait for best detection score.
Default: 10
func KeywordsFileOption ¶
KeywordsFileOption sets a file with keyphrases to spot, one per line.
func KeywordsPLPOption ¶
KeywordsPLPOption sets phone loop probability for keyphrase spotting.
Default: 1e-1
func KeywordsThresholdOption ¶
KeywordsThresholdOption threshold for p(hyp)/p(alternatives) ratio.
Default: 1.0
func LMFileOption ¶
LMFileOption sets word trigram language model input file.
func LMNameOption ¶
LMNameOption sets which language model in LMSetOption to use by default.
func LMSetOption ¶
LMSetOption specifies a set of language model.
func LPOnlyBeamOption ¶
LPOnlyBeamOption sets beam width applied to last phone in single-phone words.
Default: 7e-29
func LogBaseOption ¶
LogBaseOption sets base in which all log-likelihoods calculated.
Default: 1.0001
func LogFileOption ¶
LogFileOption sets file to write log messages in.
func MDefFileOption ¶
MDefFileOption sets model definition input file.
func MFCLogDirOption ¶
MFCLogDirOption sets directory to log feature files to.
func MLLRFileOption ¶
MLLRFileOption sets MLLR transformation to apply to means and variances.
func MMapOption ¶
MMapOption enables use of memory-mapped I/O (if possible) for model files.
Default: true
func MaxHMMPFOption ¶
MaxHMMPFOption sets maximum number of active HMMs to maintain at each frame (or -1 for no pruning).
Default: 30000
func MaxWPFOption ¶
MaxWPFOption sets maximum number of distinct word exits at each frame (or -1 for no pruning).
Default: -1
func MeansFileOption ¶
MeansFileOption sets mixture gaussian means input file.
func MinEndFrOption ¶
MinEndFrOption sets nodes ignored in lattice construction if they persist for fewer than N frames.
Default: 0
func MixWFileOption ¶
MixWFileOption sets senone mixture weights input file (uncompressed).
func MixWFloorOption ¶
MixWFloorOption sets senone mixture weights floor (applied to data from MixWFileOption file).
Default: 0.0000001
func PLBeamOption ¶
PLBeamOption sets beam width applied to phone loop search for lookahead.
Default: 1e-10
func PLPBeamOption ¶
PLPBeamOption sets beam width applied to phone loop transitions for lookahead.
Default: 1e-10
func RawLogDirOption ¶
RawLogDirOption sets directory to log raw audio files to.
func SenDumpFileOption ¶
SenDumpFileOption sets senone dump (compressed mixture weights) input file.
func SenLogDirOption ¶
SenLogDirOption sets directory to log senone score files to.
func TMatFileOption ¶
TMatFileOption sets HMM state transition matrix input file.
func TMatFloorOption ¶
TMatFloorOption sets HMM state transition probability floor (applied to TMatFileOption file).
func TopNBeamOption ¶
TopNBeamOption sets beam width used to determine top-N Gaussians (or a list, per-feature).
Default: "0"
func UserOption ¶
UserOption sets a user specified option to a custom value.
func VarFileOption ¶
VarFileOption sets mixture gaussian variances input file.
func VarFloorOption ¶
VarFloorOption sets mixture gaussian variance floor (applied to data from VarFileOption file).
Default: 0.0001