/// <summary> /// Returns the strings that can be produced from the given state, or /// false if more than <code>limit</code> strings are found. /// <code>limit</code><0 means "infinite". /// </summary> private static bool GetFiniteStrings(State s, HashSet <State> pathstates, HashSet <IntsRef> strings, IntsRef path, int limit) { pathstates.Add(s); foreach (Transition t in s.Transitions) { if (pathstates.Contains(t.To)) { return(false); } for (int n = t.Min_Renamed; n <= t.Max_Renamed; n++) { path.Grow(path.Length + 1); path.Ints[path.Length] = n; path.Length++; if (t.To.accept) { strings.Add(IntsRef.DeepCopyOf(path)); if (limit >= 0 && strings.Count > limit) { return(false); } } if (!GetFiniteStrings(t.To, pathstates, strings, path, limit)) { return(false); } path.Length--; } } pathstates.Remove(s); return(true); }
// FST is pruned private void VerifyPruned(int inputMode, FST <T> fst, int prune1, int prune2) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: now verify pruned " + Pairs.Count + " terms; outputs=" + Outputs); foreach (InputOutput <T> pair in Pairs) { Console.WriteLine(" " + InputToString(inputMode, pair.Input) + ": " + Outputs.OutputToString(pair.Output)); } } // To validate the FST, we brute-force compute all prefixes // in the terms, matched to their "common" outputs, prune that // set according to the prune thresholds, then assert the FST // matches that same set. // NOTE: Crazy RAM intensive!! //System.out.println("TEST: tally prefixes"); // build all prefixes IDictionary <IntsRef, CountMinOutput <T> > prefixes = new Dictionary <IntsRef, CountMinOutput <T> >(); IntsRef scratch = new IntsRef(10); foreach (InputOutput <T> pair in Pairs) { scratch.CopyInts(pair.Input); for (int idx = 0; idx <= pair.Input.Length; idx++) { scratch.Length = idx; CountMinOutput <T> cmo = prefixes[scratch]; if (cmo == null) { cmo = new CountMinOutput <T>(); cmo.Count = 1; cmo.Output = pair.Output; prefixes[IntsRef.DeepCopyOf(scratch)] = cmo; } else { cmo.Count++; T output1 = cmo.Output; if (output1.Equals(Outputs.NoOutput)) { output1 = Outputs.NoOutput; } T output2 = pair.Output; if (output2.Equals(Outputs.NoOutput)) { output2 = Outputs.NoOutput; } cmo.Output = Outputs.Common(output1, output2); } if (idx == pair.Input.Length) { cmo.IsFinal = true; cmo.FinalOutput = cmo.Output; } } } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: now prune"); } // prune 'em IEnumerator <KeyValuePair <IntsRef, CountMinOutput <T> > > it = prefixes.GetEnumerator(); while (it.MoveNext()) { KeyValuePair <IntsRef, CountMinOutput <T> > ent = it.Current; IntsRef prefix = ent.Key; CountMinOutput <T> cmo = ent.Value; if (LuceneTestCase.VERBOSE) { Console.WriteLine(" term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + Outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal); } bool keep; if (prune1 > 0) { keep = cmo.Count >= prune1; } else { Debug.Assert(prune2 > 0); if (prune2 > 1 && cmo.Count >= prune2) { keep = true; } else if (prefix.Length > 0) { // consult our parent scratch.Length = prefix.Length - 1; Array.Copy(prefix.Ints, prefix.Offset, scratch.Ints, 0, scratch.Length); CountMinOutput <T> cmo2 = prefixes[scratch]; //System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count)); keep = cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1))); } else if (cmo.Count >= prune2) { keep = true; } else { keep = false; } } if (!keep) { it.Reset(); //System.out.println(" remove"); } else { // clear isLeaf for all ancestors //System.out.println(" keep"); scratch.CopyInts(prefix); scratch.Length--; while (scratch.Length >= 0) { CountMinOutput <T> cmo2 = prefixes[scratch]; if (cmo2 != null) { //System.out.println(" clear isLeaf " + inputToString(inputMode, scratch)); cmo2.IsLeaf = false; } scratch.Length--; } } } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: after prune"); foreach (KeyValuePair <IntsRef, CountMinOutput <T> > ent in prefixes) { Console.WriteLine(" " + InputToString(inputMode, ent.Key, false) + ": isLeaf=" + ent.Value.IsLeaf + " isFinal=" + ent.Value.IsFinal); if (ent.Value.IsFinal) { Console.WriteLine(" finalOutput=" + Outputs.OutputToString(ent.Value.FinalOutput)); } } } if (prefixes.Count <= 1) { Assert.IsNull(fst); return; } Assert.IsNotNull(fst); // make sure FST only enums valid prefixes if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: check pruned enum"); } IntsRefFSTEnum <T> fstEnum = new IntsRefFSTEnum <T>(fst); IntsRefFSTEnum <T> .InputOutput <T> current; while ((current = fstEnum.Next()) != null) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" fstEnum.next prefix=" + InputToString(inputMode, current.Input, false) + " output=" + Outputs.OutputToString(current.Output)); } CountMinOutput <T> cmo = prefixes[current.Input]; Assert.IsNotNull(cmo); Assert.IsTrue(cmo.IsLeaf || cmo.IsFinal); //if (cmo.isFinal && !cmo.isLeaf) { if (cmo.IsFinal) { Assert.AreEqual(cmo.FinalOutput, current.Output); } else { Assert.AreEqual(cmo.Output, current.Output); } } // make sure all non-pruned prefixes are present in the FST if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify all prefixes"); } int[] stopNode = new int[1]; foreach (KeyValuePair <IntsRef, CountMinOutput <T> > ent in prefixes) { if (ent.Key.Length > 0) { CountMinOutput <T> cmo = ent.Value; T output = Run(fst, ent.Key, stopNode); if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify prefix=" + InputToString(inputMode, ent.Key, false) + " output=" + Outputs.OutputToString(cmo.Output)); } // if (cmo.isFinal && !cmo.isLeaf) { if (cmo.IsFinal) { Assert.AreEqual(cmo.FinalOutput, output); } else { Assert.AreEqual(cmo.Output, output); } Assert.AreEqual(ent.Key.Length, stopNode[0]); } } }