Beispiel #1
0
 /// <summary>
 /// Returns the strings that can be produced from the given state, or
 /// false if more than <code>limit</code> strings are found.
 /// <code>limit</code>&lt;0 means "infinite".
 /// </summary>
 private static bool GetFiniteStrings(State s, HashSet <State> pathstates, HashSet <IntsRef> strings, IntsRef path, int limit)
 {
     pathstates.Add(s);
     foreach (Transition t in s.Transitions)
     {
         if (pathstates.Contains(t.To))
         {
             return(false);
         }
         for (int n = t.Min_Renamed; n <= t.Max_Renamed; n++)
         {
             path.Grow(path.Length + 1);
             path.Ints[path.Length] = n;
             path.Length++;
             if (t.To.accept)
             {
                 strings.Add(IntsRef.DeepCopyOf(path));
                 if (limit >= 0 && strings.Count > limit)
                 {
                     return(false);
                 }
             }
             if (!GetFiniteStrings(t.To, pathstates, strings, path, limit))
             {
                 return(false);
             }
             path.Length--;
         }
     }
     pathstates.Remove(s);
     return(true);
 }
Beispiel #2
0
        // FST is pruned
        private void VerifyPruned(int inputMode, FST <T> fst, int prune1, int prune2)
        {
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: now verify pruned " + Pairs.Count + " terms; outputs=" + Outputs);
                foreach (InputOutput <T> pair in Pairs)
                {
                    Console.WriteLine("  " + InputToString(inputMode, pair.Input) + ": " + Outputs.OutputToString(pair.Output));
                }
            }

            // To validate the FST, we brute-force compute all prefixes
            // in the terms, matched to their "common" outputs, prune that
            // set according to the prune thresholds, then assert the FST
            // matches that same set.

            // NOTE: Crazy RAM intensive!!

            //System.out.println("TEST: tally prefixes");

            // build all prefixes
            IDictionary <IntsRef, CountMinOutput <T> > prefixes = new Dictionary <IntsRef, CountMinOutput <T> >();
            IntsRef scratch = new IntsRef(10);

            foreach (InputOutput <T> pair in Pairs)
            {
                scratch.CopyInts(pair.Input);
                for (int idx = 0; idx <= pair.Input.Length; idx++)
                {
                    scratch.Length = idx;
                    CountMinOutput <T> cmo = prefixes[scratch];
                    if (cmo == null)
                    {
                        cmo        = new CountMinOutput <T>();
                        cmo.Count  = 1;
                        cmo.Output = pair.Output;
                        prefixes[IntsRef.DeepCopyOf(scratch)] = cmo;
                    }
                    else
                    {
                        cmo.Count++;
                        T output1 = cmo.Output;
                        if (output1.Equals(Outputs.NoOutput))
                        {
                            output1 = Outputs.NoOutput;
                        }
                        T output2 = pair.Output;
                        if (output2.Equals(Outputs.NoOutput))
                        {
                            output2 = Outputs.NoOutput;
                        }
                        cmo.Output = Outputs.Common(output1, output2);
                    }
                    if (idx == pair.Input.Length)
                    {
                        cmo.IsFinal     = true;
                        cmo.FinalOutput = cmo.Output;
                    }
                }
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: now prune");
            }

            // prune 'em
            IEnumerator <KeyValuePair <IntsRef, CountMinOutput <T> > > it = prefixes.GetEnumerator();

            while (it.MoveNext())
            {
                KeyValuePair <IntsRef, CountMinOutput <T> > ent = it.Current;
                IntsRef            prefix = ent.Key;
                CountMinOutput <T> cmo    = ent.Value;
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("  term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + Outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal);
                }
                bool keep;
                if (prune1 > 0)
                {
                    keep = cmo.Count >= prune1;
                }
                else
                {
                    Debug.Assert(prune2 > 0);
                    if (prune2 > 1 && cmo.Count >= prune2)
                    {
                        keep = true;
                    }
                    else if (prefix.Length > 0)
                    {
                        // consult our parent
                        scratch.Length = prefix.Length - 1;
                        Array.Copy(prefix.Ints, prefix.Offset, scratch.Ints, 0, scratch.Length);
                        CountMinOutput <T> cmo2 = prefixes[scratch];
                        //System.out.println("    parent count = " + (cmo2 == null ? -1 : cmo2.count));
                        keep = cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1)));
                    }
                    else if (cmo.Count >= prune2)
                    {
                        keep = true;
                    }
                    else
                    {
                        keep = false;
                    }
                }

                if (!keep)
                {
                    it.Reset();
                    //System.out.println("    remove");
                }
                else
                {
                    // clear isLeaf for all ancestors
                    //System.out.println("    keep");
                    scratch.CopyInts(prefix);
                    scratch.Length--;
                    while (scratch.Length >= 0)
                    {
                        CountMinOutput <T> cmo2 = prefixes[scratch];
                        if (cmo2 != null)
                        {
                            //System.out.println("    clear isLeaf " + inputToString(inputMode, scratch));
                            cmo2.IsLeaf = false;
                        }
                        scratch.Length--;
                    }
                }
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: after prune");
                foreach (KeyValuePair <IntsRef, CountMinOutput <T> > ent in prefixes)
                {
                    Console.WriteLine("  " + InputToString(inputMode, ent.Key, false) + ": isLeaf=" + ent.Value.IsLeaf + " isFinal=" + ent.Value.IsFinal);
                    if (ent.Value.IsFinal)
                    {
                        Console.WriteLine("    finalOutput=" + Outputs.OutputToString(ent.Value.FinalOutput));
                    }
                }
            }

            if (prefixes.Count <= 1)
            {
                Assert.IsNull(fst);
                return;
            }

            Assert.IsNotNull(fst);

            // make sure FST only enums valid prefixes
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: check pruned enum");
            }
            IntsRefFSTEnum <T> fstEnum = new IntsRefFSTEnum <T>(fst);

            IntsRefFSTEnum <T> .InputOutput <T> current;
            while ((current = fstEnum.Next()) != null)
            {
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("  fstEnum.next prefix=" + InputToString(inputMode, current.Input, false) + " output=" + Outputs.OutputToString(current.Output));
                }
                CountMinOutput <T> cmo = prefixes[current.Input];
                Assert.IsNotNull(cmo);
                Assert.IsTrue(cmo.IsLeaf || cmo.IsFinal);
                //if (cmo.isFinal && !cmo.isLeaf) {
                if (cmo.IsFinal)
                {
                    Assert.AreEqual(cmo.FinalOutput, current.Output);
                }
                else
                {
                    Assert.AreEqual(cmo.Output, current.Output);
                }
            }

            // make sure all non-pruned prefixes are present in the FST
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: verify all prefixes");
            }
            int[] stopNode = new int[1];
            foreach (KeyValuePair <IntsRef, CountMinOutput <T> > ent in prefixes)
            {
                if (ent.Key.Length > 0)
                {
                    CountMinOutput <T> cmo = ent.Value;
                    T output = Run(fst, ent.Key, stopNode);
                    if (LuceneTestCase.VERBOSE)
                    {
                        Console.WriteLine("TEST: verify prefix=" + InputToString(inputMode, ent.Key, false) + " output=" + Outputs.OutputToString(cmo.Output));
                    }
                    // if (cmo.isFinal && !cmo.isLeaf) {
                    if (cmo.IsFinal)
                    {
                        Assert.AreEqual(cmo.FinalOutput, output);
                    }
                    else
                    {
                        Assert.AreEqual(cmo.Output, output);
                    }
                    Assert.AreEqual(ent.Key.Length, stopNode[0]);
                }
            }
        }