GRM-SFST  sfst-1.2.1
OpenGrm SFst Library
stationary-distrib.h
Go to the documentation of this file.
1 // Copyright 2018-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 // Computes the stationary distribution of a stochastic FST.
15 
16 #ifndef NLP_GRM2_SFST_STATIONARY_DISTRIB_H_
17 #define NLP_GRM2_SFST_STATIONARY_DISTRIB_H_
18 
19 #include <cstddef>
20 #include <vector>
21 
22 #include <fst/arc.h>
23 #include <fst/fst.h>
24 #include <fst/signed-log-weight.h>
25 #include <fst/vector-fst.h>
26 #include <fst/weight.h>
27 #include <sfst/rmphi.h>
28 #include <sfst/sfst.h>
29 
30 namespace sfst {
31 
32 namespace internal {
33 
34 // Corresponds to a re-entry prob = 99.9999%
35 const fst::SignedLog64Weight kReEntryWeight = {1.0, 1.0e-6};
36 // Maximum number of stationary distribution iterations
37 constexpr size_t kMaxSDIters = 100;
38 
39 // Computes the stationary distribution of the Markov chain consisting
40 // of the closure of the input stochastic FST with re-entry weight
41 // 'alpha'. This version is on the signed log semiring. The
42 // convergence is controlled by 'delta' and the number of iterations
43 // by 'maxiters' Epsilons (but no epsilon cycles) are
44 // permitted. Returns true on convergence.
45 //
46 // For cyclic input and 'negative' weights, any convergence is, in
47 // general, conditional and not absolute, so it will depend on the
48 // specific input.
50  const fst::Fst<fst::SignedLog64Arc> &fst,
51  std::vector<fst::SignedLog64Weight> *weight,
52  fst::SignedLog64Weight alpha = internal::kReEntryWeight,
53  float delta = fst::kDelta,
54  size_t maxiters = internal::kMaxSDIters);
55 
56 } // namespace internal
57 
58 
59 // Computes the stationary distribution of the Markov chain consisting
60 // of the closure of the input stochastic FST with re-entry weight
61 // 'alpha'. This version handles failure transitions when present.
62 // The convergence is controlled by 'delta' and the number of
63 // iterations by 'maxiters'. Returns true on convergence. Assumes
64 // (but does not check) that the input has the canonical topology (see
65 // canonical.h). Also assumes input has no (non-phi) epsilons (or
66 // treats such epsilons w.r.t. the failure semantics as if they were
67 // regular, uniquely-labeled symbols).
68 template <class Arc>
70  const fst::Fst<Arc> &fst,
71  std::vector<typename Arc::Weight> *weight,
72  typename Arc::Weight alpha,
73  typename Arc::Label phi_label = fst::kNoLabel,
74  float delta = fst::kDelta,
75  size_t max_iters = internal::kMaxSDIters) {
76  namespace f = fst;
77  using Weight = typename Arc::Weight;
78  using SLArc = f::SignedLog64Arc;
79  using SLWeight = SLArc::Weight;
80 
81  f::VectorFst<SLArc> slfst;
82  internal::RmPhi(fst, &slfst, phi_label);
83  f::WeightConvert<Weight, SLWeight> to_sl_convert;
84  auto slalpha = to_sl_convert(alpha);
85 
86  std::vector<SLWeight> slweight;
87  if (!internal::SignedStationaryDistrib(slfst, &slweight, slalpha,
88  delta, max_iters))
89  return false;
90 
91  f::WeightConvert<SLWeight, Weight> from_sl_convert;
92  weight->clear();
93  for (size_t i = 0; i < slweight.size(); ++i) {
94  auto w = slweight[i];
95  if (Less(w, SLWeight::Zero()))
96  w = SLWeight::Zero();
97  weight->push_back(from_sl_convert(w));
98  }
99  return true;
100 }
101 
102 } // namespace sfst
103 
104 #endif // NLP_GRM2_SFST_STATIONARY_DISTRIB_H_
bool Less(fst::LogWeightTpl< T > weight1, fst::LogWeightTpl< T > weight2)
Definition: sfst.h:38
Definition: perplexity.h:32
const fst::SignedLog64Weight kReEntryWeight
constexpr size_t kMaxSDIters
Definition: sfstinfo.cc:39
void RmPhi(const fst::Fst< IArc > &ifst, fst::MutableFst< OArc > *ofst, typename IArc::Label phi_label=fst::kNoLabel, fst::MatcherRewriteMode rewrite_mode=fst::MATCHER_REWRITE_AUTO, const WC &weight_convert=WC())
Definition: rmphi.h:234
bool StationaryDistrib(const fst::Fst< Arc > &fst, std::vector< typename Arc::Weight > *weight, typename Arc::Weight alpha, typename Arc::Label phi_label=fst::kNoLabel, float delta=fst::kDelta, size_t max_iters=internal::kMaxSDIters)
bool SignedStationaryDistrib(const fst::Fst< fst::SignedLog64Arc > &fst, std::vector< fst::SignedLog64Weight > *weight, fst::SignedLog64Weight alpha=internal::kReEntryWeight, float delta=fst::kDelta, size_t maxiters=internal::kMaxSDIters)