NGram  ngram-1.3.16
OpenGrm-NGram library
ngrammake-main.cc
Go to the documentation of this file.
1 // Copyright 2005-2013 Brian Roark
2 // Copyright 2005-2024 Google LLC
3 //
4 // Licensed under the Apache License, Version 2.0 (the 'License');
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an 'AS IS' BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // Makes a normalized n-gram model from an input FST with raw counts.
17 
18 #include <cstring>
19 #include <memory>
20 #include <string>
21 
22 #include <fst/flags.h>
23 #include <fst/fst.h>
24 #include <fst/mutable-fst.h>
25 #include <fst/vector-fst.h>
26 #include <ngram/hist-arc.h>
27 #include <ngram/ngram-make.h>
28 
29 DECLARE_double(witten_bell_k);
30 DECLARE_double(discount_D);
31 DECLARE_string(method);
32 DECLARE_bool(backoff);
33 DECLARE_bool(interpolate);
34 DECLARE_int64(bins);
35 DECLARE_int64(backoff_label);
36 DECLARE_double(norm_eps);
37 DECLARE_bool(check_consistency);
38 DECLARE_string(count_of_counts);
39 
40 int ngrammake_main(int argc, char **argv) {
41  std::string usage = "Make n-gram model from input count file.\n\n Usage: ";
42  usage += argv[0];
43  usage += " [--options] [in.fst [out.fst]]\n";
44  SET_FLAGS(usage.c_str(), &argc, &argv, true);
45 
46  if (argc < 1 || argc > 3) {
47  ShowUsage();
48  return 1;
49  }
50 
51  std::string in_name =
52  (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
53  std::unique_ptr<fst::StdFst> ccfst;
54  if (!FST_FLAGS_count_of_counts.empty()) {
55  ccfst.reset(fst::StdFst::Read(FST_FLAGS_count_of_counts));
56  if (!ccfst) return 1;
57  }
58 
59  bool model_made = false;
60  std::unique_ptr<fst::StdMutableFst> fst;
61  if (FST_FLAGS_method == "katz_frac") {
62  std::unique_ptr<fst::VectorFst<ngram::HistogramArc>> hist_fst(
63  fst::VectorFst<ngram::HistogramArc>::Read(in_name));
64  if (hist_fst) {
65  fst = std::make_unique<fst::StdVectorFst>();
66  model_made = ngram::NGramMakeHistModel(
67  hist_fst.get(), fst.get(), FST_FLAGS_method, ccfst.get(),
68  FST_FLAGS_interpolate, FST_FLAGS_bins,
69  FST_FLAGS_backoff_label, FST_FLAGS_norm_eps,
70  FST_FLAGS_check_consistency);
71  }
72  } else {
73  fst.reset(fst::StdMutableFst::Read(in_name, /*convert=*/true));
74  if (fst) {
75  model_made = ngram::NGramMakeModel(
76  fst.get(), FST_FLAGS_method, ccfst.get(),
77  FST_FLAGS_backoff, FST_FLAGS_interpolate,
78  FST_FLAGS_bins, FST_FLAGS_witten_bell_k,
79  FST_FLAGS_discount_D, FST_FLAGS_backoff_label,
80  FST_FLAGS_norm_eps,
81  FST_FLAGS_check_consistency);
82  }
83  }
84  if (model_made) {
85  std::string out_name =
86  (argc > 2 && (strcmp(argv[2], "-") != 0)) ? argv[2] : "";
87  fst->Write(out_name);
88  }
89  return !model_made;
90 }
DECLARE_string(method)
bool NGramMakeHistModel(fst::MutableFst< ngram::HistogramArc > *hist_fst, fst::StdMutableFst *fst, const std::string &method, const fst::StdFst *ccfst=nullptr, bool interpolate=false, int64_t bins=-1, int64_t backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false)
Definition: ngram-make.cc:120
bool NGramMakeModel(fst::StdMutableFst *fst, const std::string &method, const fst::StdFst *ccfst=nullptr, bool backoff=false, bool interpolate=false, int64_t bins=-1, double witten_bell_k=1, double discount_D=-1.0, int64_t backoff_label=0, double norm_eps=kNormEps, bool check_consistency=false)
Definition: ngram-make.cc:44
DECLARE_int64(bins)
int ngrammake_main(int argc, char **argv)
DECLARE_bool(backoff)
DECLARE_double(witten_bell_k)