GRM-SFST  sfst-1.2.1
OpenGrm SFst Library
canonical.h
Go to the documentation of this file.
1 // Copyright 2018-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 // Tests if stochastic FSTs have canonical topology w.r.t. any failure
15 // (and/or epsilon) transitions.
16 
17 #ifndef NLP_GRM2_SFST_CANONICAL_H_
18 #define NLP_GRM2_SFST_CANONICAL_H_
19 
20 #include <vector>
21 
22 #include <fst/log.h>
23 #include <fst/arcfilter.h>
24 #include <fst/dfs-visit.h>
25 #include <fst/fst.h>
26 #include <fst/matcher.h>
27 #include <fst/properties.h>
28 #include <fst/topsort.h>
29 
30 namespace sfst {
31 
32 // Finds the topological ordering of the states w.r.t. the (input) phi-labeled
33 // (and epsilon) transitions. If acyclic w.r.t. these transitions,
34 // then top_order[i] gives the ith state ID in the phi-topological order
35 // and true is returned, else top_order is unchanged and false is returned.
36 template <class Arc>
37 bool PhiTopOrder(const fst::Fst<Arc> &fst,
38  typename Arc::Label phi_label,
39  std::vector<typename Arc::StateId> *top_order) {
40  namespace f = fst;
41  typedef typename Arc::StateId StateId;
42 
43  f::MultiLabelArcFilter<Arc> label_filter;
44  if (phi_label != f::kNoLabel)
45  label_filter.AddLabel(phi_label);
46  if (phi_label != 0)
47  label_filter.AddLabel(0);
48  bool acyclic;
49  std::vector<StateId> inv_order;
50  f::TopOrderVisitor<Arc> top_order_visitor(&inv_order, &acyclic);
51  f::DfsVisit(fst, &top_order_visitor, label_filter);
52  if (acyclic) {
53  top_order->resize(inv_order.size());
54  for (StateId s = 0; s < inv_order.size(); ++s) {
55  (*top_order)[inv_order[s]] = s;
56  }
57  }
58  return acyclic;
59 }
60 
61 // CANONICAL SFST TOPOLOGY: (input) arc-sorted and if (input) phi
62 // labeled, at most one phi-labeled transition per state and no
63 // phi-labeled cycles. No assumption is made of general determinism
64 // or what transitions must be present on failure (unlike in a
65 // canonical n-gram model).
66 
67 // Tests if input has the canonical topology of a stochastic FST. This
68 // version is passed the canonical top_order vector to fill in (as
69 // returned by PhiTopOrder).
70 template <class Arc>
71 bool IsCanonical(const fst::Fst<Arc> &fst, typename Arc::Label phi_label,
72  std::vector<typename Arc::StateId> *top_order) {
73  namespace f = fst;
74  typedef typename Arc::StateId StateId;
75  typedef f::ExplicitMatcher<f::Matcher<f::Fst<Arc>>> Matr;
76  typedef f::StateIterator<f::Fst<Arc>> StateItr;
77 
78  bool phi_acyclic = PhiTopOrder(fst, phi_label, top_order);
79  // phi label cycle?
80  if (!phi_acyclic) {
81  VLOG(1) << "IsCanonical: input FST not acyclic w.r.t. phi/epsilon labels";
82  return false;
83  }
84  typedef typename Arc::StateId StateId;
85  // input-label sorted?
86  if (!fst.Properties(f::kILabelSorted, true)) {
87  VLOG(1) << "IsCanonical: input FST not sorted w.r.t input labels";
88  return false;
89  }
90 
91  if (phi_label != f::kNoLabel) {
92  // At most one phi-label per state?
93  Matr matcher(fst, f::MATCH_INPUT);
94  for (StateItr siter(fst); !siter.Done(); siter.Next()) {
95  StateId s = siter.Value();
96  matcher.SetState(s);
97  if (matcher.Find(phi_label)) {
98  matcher.Next();
99  if (!matcher.Done()) {
100  VLOG(1) << "IsCanonical: phi non-determinism: state: " << s;
101  return false;
102  }
103  }
104  }
105  }
106 
107  return true;
108 }
109 
110 // Tests if input has the canonical topology of a stochastic FST.
111 template <class Arc>
112 bool IsCanonical(const fst::Fst<Arc> &fst,
113  typename Arc::Label phi_label = fst::kNoLabel) {
114  typedef typename Arc::StateId StateId;
115  std::vector<StateId> top_order;
116  return IsCanonical(fst, phi_label, &top_order);
117 }
118 
119 // Finds the phi order of each state: i.e., the length of the (longest)
120 // phi path from each state + 1. Assumes but does not fully test that
121 // the input is a canonical SFST. Returns the highest order found.
122 template <class Arc>
123 int PhiStateOrder(const fst::Fst<Arc> &fst, typename Arc::Label phi_label,
124  std::vector<int> *state_order) {
125  namespace f = fst;
126  typedef typename Arc::StateId StateId;
127  typedef f::ExplicitMatcher<f::Matcher<f::Fst<Arc>>> Matr;
128  state_order->clear();
129  if (phi_label == f::kNoLabel) {
130  state_order->resize(f::CountStates(fst), 1);
131  return 1;
132  }
133  std::vector<StateId> top_order;
134  bool acyclic = PhiTopOrder(fst, phi_label, &top_order);
135  if (!acyclic) {
136  FSTERROR() << "PhiStateOrder: input FST not canonical (phi-cyclic)";
137  return 0;
138  }
139  state_order->resize(top_order.size(), 1);
140  int max_order = 1;
141 
142  Matr matcher(fst, f::MATCH_INPUT);
143  for (StateId i = top_order.size() - 1; i >= 0; --i) {
144  StateId s = top_order[i]; // ith state in reverse phi-top order
145  matcher.SetState(s);
146  if (matcher.Find(phi_label)) {
147  const Arc &arc = matcher.Value();
148  int ord = (*state_order)[arc.nextstate] + 1;
149  (*state_order)[s] = ord;
150  if (ord > max_order)
151  max_order = ord;
152  }
153  }
154  return max_order;
155 }
156 
157 
158 } // namespace sfst
159 
160 #endif // NLP_GRM2_SFST_CANONICAL_H_
int PhiStateOrder(const fst::Fst< Arc > &fst, typename Arc::Label phi_label, std::vector< int > *state_order)
Definition: canonical.h:123
Definition: perplexity.h:32
bool PhiTopOrder(const fst::Fst< Arc > &fst, typename Arc::Label phi_label, std::vector< typename Arc::StateId > *top_order)
Definition: canonical.h:37
Definition: sfstinfo.cc:39
bool IsCanonical(const fst::Fst< Arc > &fst, typename Arc::Label phi_label, std::vector< typename Arc::StateId > *top_order)
Definition: canonical.h:71