25 #include <fst/flags.h> 27 #include <fst/mutable-fst.h> 46 std::string usage =
"Shrink n-gram model from input model file.\n\n Usage: ";
48 usage +=
" [--options] [in.fst [out.fst]]\n";
49 SET_FLAGS(usage.c_str(), &argc, &argv,
true);
57 (argc > 1 && (strcmp(argv[1],
"-") != 0)) ? argv[1] :
"";
58 std::string out_name = argc > 2 ? argv[2] :
"";
60 std::unique_ptr<fst::StdMutableFst>
fst(
61 fst::StdMutableFst::Read(in_name,
true));
64 std::set<std::vector<fst::StdArc::Label>> ngram_list;
65 if (FST_FLAGS_method ==
"list_prune") {
66 if (FST_FLAGS_list_file.empty()) {
67 LOG(WARNING) <<
"list_file parameter empty, no n-grams given";
70 std::ifstream ifstrm(FST_FLAGS_list_file);
72 LOG(WARNING) <<
"NGramShrink: Can't open " 73 << FST_FLAGS_list_file <<
" for reading";
77 std::vector<std::string> ngrams_to_prune;
78 while (std::getline(ifstrm, line)) {
79 ngrams_to_prune.push_back(line);
84 FST_FLAGS_retry_downcase);
87 fst.get(), FST_FLAGS_method, ngram_list,
88 FST_FLAGS_total_unigram_count, FST_FLAGS_theta,
89 FST_FLAGS_target_number_of_ngrams,
90 FST_FLAGS_min_order_to_prune,
91 FST_FLAGS_count_pattern,
92 FST_FLAGS_context_pattern, FST_FLAGS_shrink_opt,
93 FST_FLAGS_backoff_label, FST_FLAGS_norm_eps,
94 FST_FLAGS_check_consistency))
DECLARE_bool(check_consistency)
void GetNGramListToPrune(const std::vector< std::string > &ngrams_to_prune, const fst::SymbolTable *syms, std::set< std::vector< fst::StdArc::Label >> *ngram_list, bool retry_downcase=false)
int ngramshrink_main(int argc, char **argv)
bool NGramShrinkModel(fst::StdMutableFst *fst, const std::string &method, const std::set< std::vector< fst::StdArc::Label >> &ngram_list, double tot_uni=-1.0, double theta=0.0, int64_t target_num=-1, int32_t min_order=2, const std::string &count_pattern="", std::string_view context_pattern="", int shrink_opt=0, fst::StdArc::Label backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false)
DECLARE_int64(target_number_of_ngrams)
DECLARE_double(total_unigram_count)
DECLARE_int32(min_order_to_prune)