GRM-SFST  sfst-1.2.1
OpenGrm SFst Library
approx.h
Go to the documentation of this file.
1 // Copyright 2018-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 // Algorithm to approximate a stochastic FST as a backoff FST.
15 // The backoff FST topology is provided. The result is the backoff
16 // FST weighted and normalized to approximate the input.
17 
18 #ifndef NLP_GRM2_SFST_APPROX_H_
19 #define NLP_GRM2_SFST_APPROX_H_
20 
21 
22 #include <fst/fst.h>
23 #include <sfst/count.h>
24 #include <sfst/normalize.h>
25 #include <sfst/sfst.h>
26 
27 namespace sfst {
28 
29 // A float delta for SFST approximation algorithms.
30 constexpr float kApproxDelta = 1e-10;
31 
32 // Approximates a stochastic FSA as a backoff FSA. The input 'ifst'
33 // should be a canonical stochastic FSA (see canonical.h). If it is
34 // cyclic, it should be normalized (see normalize.h - not
35 // checked). Assumes input has no (non-phi) epsilons (or treats such
36 // epsilons w.r.t. the failure semantics as if they were regular,
37 // uniquely-labeled symbols). The topology FST is provided in 'ofst'
38 // (with incoming weights ignored) and must be a backoff-complete FSA (see
39 // backoff.h). The 'phi_label' is the failure label (defaults to
40 // kNoLabel -> None). The 'delta' parameter controls the degree of
41 // and algorithm convergence. The result is the backoff-complete FSA weighted
42 // normalized to approximate the input. The algorithm computes (smoothed)
43 // counts and then normalizes those counts. See sfst::CountNormType
44 // for the normalization variants. Returns true on success.
45 template <class Arc>
46 bool Approx(const fst::Fst<Arc> &ifst,
47  fst::MutableFst<Arc> *ofst,
48  typename Arc::Label phi_label = fst::kNoLabel,
49  float delta = kApproxDelta,
50  CountNormType norm_type = NORM_KL_MIN) {
51  namespace f = fst;
52  using Label = typename Arc::Label;
53 
54  { // Counts the n-grams.
55  Counter<Arc> counter(phi_label, delta, ofst);
56  counter.Count(ifst);
57  counter.Finalize();
58  if (ofst->Properties(f::kError, false)) return false;
59  }
60 
61  return CountNormalize(ofst, phi_label, norm_type, true);
62 }
63 
64 } // namespace sfst
65 
66 #endif // NLP_GRM2_SFST_APPROX_H_
void Finalize()
Definition: count.h:78
Definition: perplexity.h:32
bool CountNormalize(fst::MutableFst< Arc > *fst, typename Arc::Label phi_label, CountNormType norm_type=NORM_SUMMED, bool trim=false, float delta=internal::CountNormalizer< Arc >::kNormDelta, double effective_zero=internal::CountNormalizer< Arc >::kEffectiveZero, size_t maxiters=internal::CountNormalizer< Arc >::kMaxNormIters)
Definition: normalize.h:928
Definition: sfstinfo.cc:39
CountNormType
Definition: normalize.h:371
constexpr float kApproxDelta
Definition: approx.h:30
bool Approx(const fst::Fst< Arc > &ifst, fst::MutableFst< Arc > *ofst, typename Arc::Label phi_label=fst::kNoLabel, float delta=kApproxDelta, CountNormType norm_type=NORM_KL_MIN)
Definition: approx.h:46
void Count(const fst::Fst< Arc > &ifst)
Definition: count.h:246