NGram  ngram-1.3.15
OpenGrm-NGram library
Namespaces | Classes | Typedefs | Functions | Variables
ngram Namespace Reference

Namespaces

 impl
 

Classes

struct  FromLexicographicMapper
 
struct  HistogramArc
 
class  LexicographicRescorer
 
class  NGramAbsolute
 
class  NGramArcSelector
 
class  NGramBayesModelMerge
 
class  NGramContext
 
class  NGramContextCountPrune
 
class  NGramContextMerge
 
class  NGramContextPrune
 
class  NGramContextRelEntropy
 
class  NGramContextSeymoreShrink
 
class  NGramCounter
 
class  NGramCountMerge
 
class  NGramCountOfCounts
 
class  NGramCountPrune
 
class  NGramExtendedContext
 
class  NGramHistMerge
 
class  NGramInput
 
class  NGramKatz
 
class  NGramKneserNey
 
class  NGramListPrune
 
class  NGramMake
 
class  NGramMarginal
 
class  NGramMerge
 
class  NGramModel
 
class  NGramModelMerge
 
class  NGramMutableModel
 
class  NGramOutput
 
class  NGramRelEntropy
 
class  NGramReplaceMerge
 
class  NGramSeymoreShrink
 
class  NGramShrink
 
class  NGramSplit
 
class  NGramTransfer
 
class  NGramUnsmoothed
 
class  NGramWittenBell
 
class  ToHistogramMapper
 
struct  ToLexicographicMapper
 
struct  ToStdArcMapper
 

Typedefs

using StdLexicographicRescorer = LexicographicRescorer< fst::StdArc >
 

Functions

void PrintNGramInfo (const NGramModel< fst::StdArc > &ngram, std::ostream &ostrm)
 
template<class Arc >
bool AscendAndCollectStateInfo (const fst::Fst< Arc > &fst, int order, typename Arc::Label backoff_label, std::vector< std::vector< typename Arc::StateId >> *order_states, std::vector< int > *state_orders, std::vector< typename Arc::StateId > *backoff_states)
 
template<class Arc >
bool NGramComplete (fst::MutableFst< Arc > *fst, typename Arc::Label backoff_label=0)
 
bool NGramReadContexts (const std::string &file, std::vector< std::string > *contexts)
 
bool NGramWriteContexts (const std::string &file, const std::vector< std::string > &contexts)
 
bool GetNGramCounts (fst::FarReader< fst::StdArc > *far_reader, fst::StdMutableFst *fst, int order, bool require_symbols=true, bool epsilon_as_backoff=false, bool round_to_int=false, double add_to_symbol_unigram_count=0.0)
 
bool GetNGramCounts (fst::FarReader< fst::StdArc > *far_reader, std::vector< std::string > *ngrams, int order, bool epsilon_as_backoff=false, double add_to_symbol_unigram_count=0.0)
 
bool GetNGramHistograms (fst::FarReader< fst::StdArc > *far_reader, fst::VectorFst< HistogramArc > *fst, int order, bool epsilon_as_backoff=false, int backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false, bool normalize=false, double alpha=1.0, double beta=1.0)
 
template<class Arc >
void GetNGramCountOfCounts (const fst::Fst< Arc > &fst, fst::StdMutableFst *ccfst, int in_order, std::string_view context_pattern)
 
void ReadTokenString (std::string_view str, std::vector< std::string > *words)
 
void GetNGramListToPrune (const std::vector< std::string > &ngrams_to_prune, const fst::SymbolTable *syms, std::set< std::vector< fst::StdArc::Label >> *ngram_list, bool retry_downcase=false)
 
bool NGramMakeModel (fst::StdMutableFst *fst, const std::string &method, const fst::StdFst *ccfst=nullptr, bool backoff=false, bool interpolate=false, int64_t bins=-1, double witten_bell_k=1, double discount_D=-1.0, int64_t backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false)
 
bool NGramMakeModel (fst::script::MutableFstClass *fst, const std::string &method, const fst::script::FstClass *ccfst=nullptr, bool backoff=false, bool interpolate=false, int64_t bins=-1, double witten_bell_k=1, double discount_D=-1.0, int64_t backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false)
 
bool NGramMakeHistModel (fst::MutableFst< ngram::HistogramArc > *hist_fst, fst::StdMutableFst *fst, const std::string &method, const fst::StdFst *ccfst=nullptr, bool interpolate=false, int64_t bins=-1, int64_t backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false)
 
bool NGramShrinkModel (fst::StdMutableFst *fst, const std::string &method, const std::set< std::vector< fst::StdArc::Label >> &ngram_list, double tot_uni=-1.0, double theta=0.0, int64_t target_num=-1, int32_t min_order=2, const std::string &count_pattern="", std::string_view context_pattern="", int shrink_opt=0, fst::StdArc::Label backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false)
 
bool NGramShrinkModel (fst::StdMutableFst *fst, const std::string &method, double tot_uni=-1.0, double theta=0.0, int64_t target_num=-1, int32_t min_order=2, const std::string &count_pattern="", std::string_view context_pattern="", int shrink_opt=0, fst::StdArc::Label backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false)
 
void RoundCountsToInt (fst::StdMutableFst *fst)
 
double GetNGramAndCount (const std::pair< std::vector< int >, std::pair< int, double >> &ngram_count, std::string *ngram, const fst::SymbolTable &syms)
 
bool GetCounts (const std::string &countname, NGramCounter< fst::Log64Weight > *ngram_counter, fst::FarReader< fst::StdArc > *far_reader, int fstnumber, fst::SymbolTable *syms)
 
bool GetSingleCountFst (fst::FarReader< fst::StdArc > *far_reader, fst::StdMutableFst *fst, int fstnumber, int order, bool epsilon_as_backoff)
 
bool GetNGramsAndSyms (fst::FarReader< fst::StdArc > *far_reader, NGramCounter< fst::Log64Weight > *ngram_counter, fst::SymbolTable *syms, bool require_symbols, double add_to_symbol_unigram_count)
 

Variables

constexpr size_t kHistogramBins = 7
 
const double kNormEps = 0.001
 
const double kFloatEps = 0.000001
 
const double kInfBackoff = 99.00
 

Typedef Documentation

Definition at line 179 of file lexicographic-map.h.

Function Documentation

template<class Arc >
bool ngram::AscendAndCollectStateInfo ( const fst::Fst< Arc > &  fst,
int  order,
typename Arc::Label  backoff_label,
std::vector< std::vector< typename Arc::StateId >> *  order_states,
std::vector< int > *  state_orders,
std::vector< typename Arc::StateId > *  backoff_states 
)

Definition at line 36 of file ngram-complete.h.

bool ngram::GetCounts ( const std::string &  countname,
NGramCounter< fst::Log64Weight > *  ngram_counter,
fst::FarReader< fst::StdArc > *  far_reader,
int  fstnumber,
fst::SymbolTable *  syms 
)

Definition at line 81 of file ngram-count.cc.

double ngram::GetNGramAndCount ( const std::pair< std::vector< int >, std::pair< int, double >> &  ngram_count,
std::string *  ngram,
const fst::SymbolTable &  syms 
)

Definition at line 62 of file ngram-count.cc.

template<class Arc >
void ngram::GetNGramCountOfCounts ( const fst::Fst< Arc > &  fst,
fst::StdMutableFst *  ccfst,
int  in_order,
std::string_view  context_pattern 
)

Definition at line 513 of file ngram-count.h.

bool ngram::GetNGramCounts ( fst::FarReader< fst::StdArc > *  far_reader,
fst::StdMutableFst *  fst,
int  order,
bool  require_symbols = true,
bool  epsilon_as_backoff = false,
bool  round_to_int = false,
double  add_to_symbol_unigram_count = 0.0 
)

Definition at line 187 of file ngram-count.cc.

bool ngram::GetNGramCounts ( fst::FarReader< fst::StdArc > *  far_reader,
std::vector< std::string > *  ngrams,
int  order,
bool  epsilon_as_backoff = false,
double  add_to_symbol_unigram_count = 0.0 
)

Definition at line 208 of file ngram-count.cc.

bool ngram::GetNGramHistograms ( fst::FarReader< fst::StdArc > *  far_reader,
fst::VectorFst< HistogramArc > *  fst,
int  order,
bool  epsilon_as_backoff = false,
int  backoff_label = 0,
double  norm_eps = kNormEps,
bool  check_consistency = false,
bool  normalize = false,
double  alpha = 1.0,
double  beta = 1.0 
)

Definition at line 133 of file ngram-count.cc.

void ngram::GetNGramListToPrune ( const std::vector< std::string > &  ngrams_to_prune,
const fst::SymbolTable *  syms,
std::set< std::vector< fst::StdArc::Label >> *  ngram_list,
bool  retry_downcase = false 
)

Definition at line 29 of file ngram-list-prune.cc.

bool ngram::GetNGramsAndSyms ( fst::FarReader< fst::StdArc > *  far_reader,
NGramCounter< fst::Log64Weight > *  ngram_counter,
fst::SymbolTable *  syms,
bool  require_symbols,
double  add_to_symbol_unigram_count 
)

Definition at line 164 of file ngram-count.cc.

bool ngram::GetSingleCountFst ( fst::FarReader< fst::StdArc > *  far_reader,
fst::StdMutableFst *  fst,
int  fstnumber,
int  order,
bool  epsilon_as_backoff 
)

Definition at line 110 of file ngram-count.cc.

template<class Arc >
bool ngram::NGramComplete ( fst::MutableFst< Arc > *  fst,
typename Arc::Label  backoff_label = 0 
)

Definition at line 69 of file ngram-complete.h.

bool ngram::NGramMakeHistModel ( fst::MutableFst< ngram::HistogramArc > *  hist_fst,
fst::StdMutableFst *  fst,
const std::string &  method,
const fst::StdFst *  ccfst = nullptr,
bool  interpolate = false,
int64_t  bins = -1,
int64_t  backoff_label = 0,
double  norm_eps = kNormEps,
bool  check_consistency = false 
)

Definition at line 120 of file ngram-make.cc.

bool ngram::NGramMakeModel ( fst::StdMutableFst *  fst,
const std::string &  method,
const fst::StdFst *  ccfst = nullptr,
bool  backoff = false,
bool  interpolate = false,
int64_t  bins = -1,
double  witten_bell_k = 1,
double  discount_D = -1.0,
int64_t  backoff_label = 0,
double  norm_eps = kNormEps,
bool  check_consistency = false 
)

Definition at line 44 of file ngram-make.cc.

bool ngram::NGramMakeModel ( fst::script::MutableFstClass *  fst,
const std::string &  method,
const fst::script::FstClass *  ccfst = nullptr,
bool  backoff = false,
bool  interpolate = false,
int64_t  bins = -1,
double  witten_bell_k = 1,
double  discount_D = -1.0,
int64_t  backoff_label = 0,
double  norm_eps = kNormEps,
bool  check_consistency = false 
)

Definition at line 106 of file ngram-make.cc.

bool ngram::NGramReadContexts ( const std::string &  file,
std::vector< std::string > *  contexts 
)

Definition at line 281 of file ngram-context.cc.

bool ngram::NGramShrinkModel ( fst::StdMutableFst *  fst,
const std::string &  method,
const std::set< std::vector< fst::StdArc::Label >> &  ngram_list,
double  tot_uni = -1.0,
double  theta = 0.0,
int64_t  target_num = -1,
int32_t  min_order = 2,
const std::string &  count_pattern = "",
std::string_view  context_pattern = "",
int  shrink_opt = 0,
fst::StdArc::Label  backoff_label = 0,
double  norm_eps = kNormEps,
bool  check_consistency = false 
)

Definition at line 68 of file ngram-shrink.cc.

bool ngram::NGramShrinkModel ( fst::StdMutableFst *  fst,
const std::string &  method,
double  tot_uni = -1.0,
double  theta = 0.0,
int64_t  target_num = -1,
int32_t  min_order = 2,
const std::string &  count_pattern = "",
std::string_view  context_pattern = "",
int  shrink_opt = 0,
fst::StdArc::Label  backoff_label = 0,
double  norm_eps = kNormEps,
bool  check_consistency = false 
)

Definition at line 55 of file ngram-shrink.cc.

bool ngram::NGramWriteContexts ( const std::string &  file,
const std::vector< std::string > &  contexts 
)

Definition at line 296 of file ngram-context.cc.

void ngram::PrintNGramInfo ( const NGramModel< fst::StdArc > &  ngram,
std::ostream &  ostrm 
)

Definition at line 40 of file ngraminfo-main.cc.

void ngram::ReadTokenString ( std::string_view  str,
std::vector< std::string > *  words 
)

Definition at line 59 of file ngram-input.cc.

void ngram::RoundCountsToInt ( fst::StdMutableFst *  fst)

Definition at line 45 of file ngram-count.cc.

Variable Documentation

const double ngram::kFloatEps = 0.000001

Definition at line 38 of file ngram-model.h.

constexpr size_t ngram::kHistogramBins = 7

Definition at line 31 of file hist-arc.h.

const double ngram::kInfBackoff = 99.00

Definition at line 39 of file ngram-model.h.

const double ngram::kNormEps = 0.001

Definition at line 37 of file ngram-model.h.