NGram
ngram-1.3.15
OpenGrm-NGram library
|
Namespaces | |
impl | |
Classes | |
struct | FromLexicographicMapper |
struct | HistogramArc |
class | LexicographicRescorer |
class | NGramAbsolute |
class | NGramArcSelector |
class | NGramBayesModelMerge |
class | NGramContext |
class | NGramContextCountPrune |
class | NGramContextMerge |
class | NGramContextPrune |
class | NGramContextRelEntropy |
class | NGramContextSeymoreShrink |
class | NGramCounter |
class | NGramCountMerge |
class | NGramCountOfCounts |
class | NGramCountPrune |
class | NGramExtendedContext |
class | NGramHistMerge |
class | NGramInput |
class | NGramKatz |
class | NGramKneserNey |
class | NGramListPrune |
class | NGramMake |
class | NGramMarginal |
class | NGramMerge |
class | NGramModel |
class | NGramModelMerge |
class | NGramMutableModel |
class | NGramOutput |
class | NGramRelEntropy |
class | NGramReplaceMerge |
class | NGramSeymoreShrink |
class | NGramShrink |
class | NGramSplit |
class | NGramTransfer |
class | NGramUnsmoothed |
class | NGramWittenBell |
class | ToHistogramMapper |
struct | ToLexicographicMapper |
struct | ToStdArcMapper |
Typedefs | |
using | StdLexicographicRescorer = LexicographicRescorer< fst::StdArc > |
Functions | |
void | PrintNGramInfo (const NGramModel< fst::StdArc > &ngram, std::ostream &ostrm) |
template<class Arc > | |
bool | AscendAndCollectStateInfo (const fst::Fst< Arc > &fst, int order, typename Arc::Label backoff_label, std::vector< std::vector< typename Arc::StateId >> *order_states, std::vector< int > *state_orders, std::vector< typename Arc::StateId > *backoff_states) |
template<class Arc > | |
bool | NGramComplete (fst::MutableFst< Arc > *fst, typename Arc::Label backoff_label=0) |
bool | NGramReadContexts (const std::string &file, std::vector< std::string > *contexts) |
bool | NGramWriteContexts (const std::string &file, const std::vector< std::string > &contexts) |
bool | GetNGramCounts (fst::FarReader< fst::StdArc > *far_reader, fst::StdMutableFst *fst, int order, bool require_symbols=true, bool epsilon_as_backoff=false, bool round_to_int=false, double add_to_symbol_unigram_count=0.0) |
bool | GetNGramCounts (fst::FarReader< fst::StdArc > *far_reader, std::vector< std::string > *ngrams, int order, bool epsilon_as_backoff=false, double add_to_symbol_unigram_count=0.0) |
bool | GetNGramHistograms (fst::FarReader< fst::StdArc > *far_reader, fst::VectorFst< HistogramArc > *fst, int order, bool epsilon_as_backoff=false, int backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false, bool normalize=false, double alpha=1.0, double beta=1.0) |
template<class Arc > | |
void | GetNGramCountOfCounts (const fst::Fst< Arc > &fst, fst::StdMutableFst *ccfst, int in_order, std::string_view context_pattern) |
void | ReadTokenString (std::string_view str, std::vector< std::string > *words) |
void | GetNGramListToPrune (const std::vector< std::string > &ngrams_to_prune, const fst::SymbolTable *syms, std::set< std::vector< fst::StdArc::Label >> *ngram_list, bool retry_downcase=false) |
bool | NGramMakeModel (fst::StdMutableFst *fst, const std::string &method, const fst::StdFst *ccfst=nullptr, bool backoff=false, bool interpolate=false, int64_t bins=-1, double witten_bell_k=1, double discount_D=-1.0, int64_t backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false) |
bool | NGramMakeModel (fst::script::MutableFstClass *fst, const std::string &method, const fst::script::FstClass *ccfst=nullptr, bool backoff=false, bool interpolate=false, int64_t bins=-1, double witten_bell_k=1, double discount_D=-1.0, int64_t backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false) |
bool | NGramMakeHistModel (fst::MutableFst< ngram::HistogramArc > *hist_fst, fst::StdMutableFst *fst, const std::string &method, const fst::StdFst *ccfst=nullptr, bool interpolate=false, int64_t bins=-1, int64_t backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false) |
bool | NGramShrinkModel (fst::StdMutableFst *fst, const std::string &method, const std::set< std::vector< fst::StdArc::Label >> &ngram_list, double tot_uni=-1.0, double theta=0.0, int64_t target_num=-1, int32_t min_order=2, const std::string &count_pattern="", std::string_view context_pattern="", int shrink_opt=0, fst::StdArc::Label backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false) |
bool | NGramShrinkModel (fst::StdMutableFst *fst, const std::string &method, double tot_uni=-1.0, double theta=0.0, int64_t target_num=-1, int32_t min_order=2, const std::string &count_pattern="", std::string_view context_pattern="", int shrink_opt=0, fst::StdArc::Label backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false) |
void | RoundCountsToInt (fst::StdMutableFst *fst) |
double | GetNGramAndCount (const std::pair< std::vector< int >, std::pair< int, double >> &ngram_count, std::string *ngram, const fst::SymbolTable &syms) |
bool | GetCounts (const std::string &countname, NGramCounter< fst::Log64Weight > *ngram_counter, fst::FarReader< fst::StdArc > *far_reader, int fstnumber, fst::SymbolTable *syms) |
bool | GetSingleCountFst (fst::FarReader< fst::StdArc > *far_reader, fst::StdMutableFst *fst, int fstnumber, int order, bool epsilon_as_backoff) |
bool | GetNGramsAndSyms (fst::FarReader< fst::StdArc > *far_reader, NGramCounter< fst::Log64Weight > *ngram_counter, fst::SymbolTable *syms, bool require_symbols, double add_to_symbol_unigram_count) |
Variables | |
constexpr size_t | kHistogramBins = 7 |
const double | kNormEps = 0.001 |
const double | kFloatEps = 0.000001 |
const double | kInfBackoff = 99.00 |
using ngram::StdLexicographicRescorer = typedef LexicographicRescorer<fst::StdArc> |
Definition at line 179 of file lexicographic-map.h.
bool ngram::AscendAndCollectStateInfo | ( | const fst::Fst< Arc > & | fst, |
int | order, | ||
typename Arc::Label | backoff_label, | ||
std::vector< std::vector< typename Arc::StateId >> * | order_states, | ||
std::vector< int > * | state_orders, | ||
std::vector< typename Arc::StateId > * | backoff_states | ||
) |
Definition at line 36 of file ngram-complete.h.
bool ngram::GetCounts | ( | const std::string & | countname, |
NGramCounter< fst::Log64Weight > * | ngram_counter, | ||
fst::FarReader< fst::StdArc > * | far_reader, | ||
int | fstnumber, | ||
fst::SymbolTable * | syms | ||
) |
Definition at line 81 of file ngram-count.cc.
double ngram::GetNGramAndCount | ( | const std::pair< std::vector< int >, std::pair< int, double >> & | ngram_count, |
std::string * | ngram, | ||
const fst::SymbolTable & | syms | ||
) |
Definition at line 62 of file ngram-count.cc.
void ngram::GetNGramCountOfCounts | ( | const fst::Fst< Arc > & | fst, |
fst::StdMutableFst * | ccfst, | ||
int | in_order, | ||
std::string_view | context_pattern | ||
) |
Definition at line 513 of file ngram-count.h.
bool ngram::GetNGramCounts | ( | fst::FarReader< fst::StdArc > * | far_reader, |
fst::StdMutableFst * | fst, | ||
int | order, | ||
bool | require_symbols = true , |
||
bool | epsilon_as_backoff = false , |
||
bool | round_to_int = false , |
||
double | add_to_symbol_unigram_count = 0.0 |
||
) |
Definition at line 187 of file ngram-count.cc.
bool ngram::GetNGramCounts | ( | fst::FarReader< fst::StdArc > * | far_reader, |
std::vector< std::string > * | ngrams, | ||
int | order, | ||
bool | epsilon_as_backoff = false , |
||
double | add_to_symbol_unigram_count = 0.0 |
||
) |
Definition at line 208 of file ngram-count.cc.
bool ngram::GetNGramHistograms | ( | fst::FarReader< fst::StdArc > * | far_reader, |
fst::VectorFst< HistogramArc > * | fst, | ||
int | order, | ||
bool | epsilon_as_backoff = false , |
||
int | backoff_label = 0 , |
||
double | norm_eps = kNormEps , |
||
bool | check_consistency = false , |
||
bool | normalize = false , |
||
double | alpha = 1.0 , |
||
double | beta = 1.0 |
||
) |
Definition at line 133 of file ngram-count.cc.
void ngram::GetNGramListToPrune | ( | const std::vector< std::string > & | ngrams_to_prune, |
const fst::SymbolTable * | syms, | ||
std::set< std::vector< fst::StdArc::Label >> * | ngram_list, | ||
bool | retry_downcase = false |
||
) |
Definition at line 29 of file ngram-list-prune.cc.
bool ngram::GetNGramsAndSyms | ( | fst::FarReader< fst::StdArc > * | far_reader, |
NGramCounter< fst::Log64Weight > * | ngram_counter, | ||
fst::SymbolTable * | syms, | ||
bool | require_symbols, | ||
double | add_to_symbol_unigram_count | ||
) |
Definition at line 164 of file ngram-count.cc.
bool ngram::GetSingleCountFst | ( | fst::FarReader< fst::StdArc > * | far_reader, |
fst::StdMutableFst * | fst, | ||
int | fstnumber, | ||
int | order, | ||
bool | epsilon_as_backoff | ||
) |
Definition at line 110 of file ngram-count.cc.
bool ngram::NGramComplete | ( | fst::MutableFst< Arc > * | fst, |
typename Arc::Label | backoff_label = 0 |
||
) |
Definition at line 69 of file ngram-complete.h.
bool ngram::NGramMakeHistModel | ( | fst::MutableFst< ngram::HistogramArc > * | hist_fst, |
fst::StdMutableFst * | fst, | ||
const std::string & | method, | ||
const fst::StdFst * | ccfst = nullptr , |
||
bool | interpolate = false , |
||
int64_t | bins = -1 , |
||
int64_t | backoff_label = 0 , |
||
double | norm_eps = kNormEps , |
||
bool | check_consistency = false |
||
) |
Definition at line 120 of file ngram-make.cc.
bool ngram::NGramMakeModel | ( | fst::StdMutableFst * | fst, |
const std::string & | method, | ||
const fst::StdFst * | ccfst = nullptr , |
||
bool | backoff = false , |
||
bool | interpolate = false , |
||
int64_t | bins = -1 , |
||
double | witten_bell_k = 1 , |
||
double | discount_D = -1.0 , |
||
int64_t | backoff_label = 0 , |
||
double | norm_eps = kNormEps , |
||
bool | check_consistency = false |
||
) |
Definition at line 44 of file ngram-make.cc.
bool ngram::NGramMakeModel | ( | fst::script::MutableFstClass * | fst, |
const std::string & | method, | ||
const fst::script::FstClass * | ccfst = nullptr , |
||
bool | backoff = false , |
||
bool | interpolate = false , |
||
int64_t | bins = -1 , |
||
double | witten_bell_k = 1 , |
||
double | discount_D = -1.0 , |
||
int64_t | backoff_label = 0 , |
||
double | norm_eps = kNormEps , |
||
bool | check_consistency = false |
||
) |
Definition at line 106 of file ngram-make.cc.
bool ngram::NGramReadContexts | ( | const std::string & | file, |
std::vector< std::string > * | contexts | ||
) |
Definition at line 281 of file ngram-context.cc.
bool ngram::NGramShrinkModel | ( | fst::StdMutableFst * | fst, |
const std::string & | method, | ||
const std::set< std::vector< fst::StdArc::Label >> & | ngram_list, | ||
double | tot_uni = -1.0 , |
||
double | theta = 0.0 , |
||
int64_t | target_num = -1 , |
||
int32_t | min_order = 2 , |
||
const std::string & | count_pattern = "" , |
||
std::string_view | context_pattern = "" , |
||
int | shrink_opt = 0 , |
||
fst::StdArc::Label | backoff_label = 0 , |
||
double | norm_eps = kNormEps , |
||
bool | check_consistency = false |
||
) |
Definition at line 68 of file ngram-shrink.cc.
bool ngram::NGramShrinkModel | ( | fst::StdMutableFst * | fst, |
const std::string & | method, | ||
double | tot_uni = -1.0 , |
||
double | theta = 0.0 , |
||
int64_t | target_num = -1 , |
||
int32_t | min_order = 2 , |
||
const std::string & | count_pattern = "" , |
||
std::string_view | context_pattern = "" , |
||
int | shrink_opt = 0 , |
||
fst::StdArc::Label | backoff_label = 0 , |
||
double | norm_eps = kNormEps , |
||
bool | check_consistency = false |
||
) |
Definition at line 55 of file ngram-shrink.cc.
bool ngram::NGramWriteContexts | ( | const std::string & | file, |
const std::vector< std::string > & | contexts | ||
) |
Definition at line 296 of file ngram-context.cc.
void ngram::PrintNGramInfo | ( | const NGramModel< fst::StdArc > & | ngram, |
std::ostream & | ostrm | ||
) |
Definition at line 40 of file ngraminfo-main.cc.
void ngram::ReadTokenString | ( | std::string_view | str, |
std::vector< std::string > * | words | ||
) |
Definition at line 59 of file ngram-input.cc.
void ngram::RoundCountsToInt | ( | fst::StdMutableFst * | fst | ) |
Definition at line 45 of file ngram-count.cc.
const double ngram::kFloatEps = 0.000001 |
Definition at line 38 of file ngram-model.h.
constexpr size_t ngram::kHistogramBins = 7 |
Definition at line 31 of file hist-arc.h.
const double ngram::kInfBackoff = 99.00 |
Definition at line 39 of file ngram-model.h.
const double ngram::kNormEps = 0.001 |
Definition at line 37 of file ngram-model.h.