/// <summary>
 /// Returns the strings that can be produced from the given state, or
 /// <c>false</c> if more than <paramref name="limit"/> strings are found.
 /// <paramref name="limit"/>&lt;0 means "infinite".
 /// </summary>
 private static bool GetFiniteStrings(State s, JCG.HashSet <State> pathstates, JCG.HashSet <Int32sRef> strings, Int32sRef path, int limit)
 {
     pathstates.Add(s);
     foreach (Transition t in s.GetTransitions())
     {
         if (pathstates.Contains(t.to))
         {
             return(false);
         }
         for (int n = t.min; n <= t.max; n++)
         {
             path.Grow(path.Length + 1);
             path.Int32s[path.Length] = n;
             path.Length++;
             if (t.to.accept)
             {
                 strings.Add(Int32sRef.DeepCopyOf(path));
                 if (limit >= 0 && strings.Count > limit)
                 {
                     return(false);
                 }
             }
             if (!GetFiniteStrings(t.to, pathstates, strings, path, limit))
             {
                 return(false);
             }
             path.Length--;
         }
     }
     pathstates.Remove(s);
     return(true);
 }
Example #2
0
        // FST is pruned
        private void VerifyPruned(int inputMode, FST <T> fst, int prune1, int prune2)
        {
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: now verify pruned " + pairs.Count + " terms; outputs=" + outputs);
                foreach (InputOutput <T> pair in pairs)
                {
                    Console.WriteLine("  " + InputToString(inputMode, pair.Input) + ": " + outputs.OutputToString(pair.Output));
                }
            }

            // To validate the FST, we brute-force compute all prefixes
            // in the terms, matched to their "common" outputs, prune that
            // set according to the prune thresholds, then assert the FST
            // matches that same set.

            // NOTE: Crazy RAM intensive!!

            //System.out.println("TEST: tally prefixes");

            // build all prefixes
            IDictionary <Int32sRef, CountMinOutput <T> > prefixes = new HashMap <Int32sRef, CountMinOutput <T> >();
            Int32sRef scratch = new Int32sRef(10);

            foreach (InputOutput <T> pair in pairs)
            {
                scratch.CopyInt32s(pair.Input);
                for (int idx = 0; idx <= pair.Input.Length; idx++)
                {
                    scratch.Length = idx;
                    CountMinOutput <T> cmo = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
                    if (cmo == null)
                    {
                        cmo        = new CountMinOutput <T>();
                        cmo.Count  = 1;
                        cmo.Output = pair.Output;
                        prefixes[Int32sRef.DeepCopyOf(scratch)] = cmo;
                    }
                    else
                    {
                        cmo.Count++;
                        T output1 = cmo.Output;
                        if (output1.Equals(outputs.NoOutput))
                        {
                            output1 = outputs.NoOutput;
                        }
                        T output2 = pair.Output;
                        if (output2.Equals(outputs.NoOutput))
                        {
                            output2 = outputs.NoOutput;
                        }
                        cmo.Output = outputs.Common(output1, output2);
                    }
                    if (idx == pair.Input.Length)
                    {
                        cmo.IsFinal     = true;
                        cmo.FinalOutput = cmo.Output;
                    }
                }
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: now prune");
            }


            // prune 'em
            // LUCENENET NOTE: Altered this a bit to go in reverse rather than use an enumerator since
            // in .NET you cannot delete records while enumerating forward through a dictionary.
            for (int i = prefixes.Count - 1; i >= 0; i--)
            {
                KeyValuePair <Int32sRef, CountMinOutput <T> > ent = prefixes.ElementAt(i);
                Int32sRef          prefix = ent.Key;
                CountMinOutput <T> cmo    = ent.Value;
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("  term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal);
                }
                bool keep;
                if (prune1 > 0)
                {
                    keep = cmo.Count >= prune1;
                }
                else
                {
                    Debug.Assert(prune2 > 0);
                    if (prune2 > 1 && cmo.Count >= prune2)
                    {
                        keep = true;
                    }
                    else if (prefix.Length > 0)
                    {
                        // consult our parent
                        scratch.Length = prefix.Length - 1;
                        Array.Copy(prefix.Int32s, prefix.Offset, scratch.Int32s, 0, scratch.Length);
                        CountMinOutput <T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
                        //System.out.println("    parent count = " + (cmo2 == null ? -1 : cmo2.count));
                        keep = cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1)));
                    }
                    else if (cmo.Count >= prune2)
                    {
                        keep = true;
                    }
                    else
                    {
                        keep = false;
                    }
                }

                if (!keep)
                {
                    prefixes.Remove(prefix);
                    //System.out.println("    remove");
                }
                else
                {
                    // clear isLeaf for all ancestors
                    //System.out.println("    keep");
                    scratch.CopyInt32s(prefix);
                    scratch.Length--;
                    while (scratch.Length >= 0)
                    {
                        CountMinOutput <T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null;
                        if (cmo2 != null)
                        {
                            //System.out.println("    clear isLeaf " + inputToString(inputMode, scratch));
                            cmo2.IsLeaf = false;
                        }
                        scratch.Length--;
                    }
                }
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: after prune");
                foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes)
                {
                    Console.WriteLine("  " + InputToString(inputMode, ent.Key, false) + ": isLeaf=" + ent.Value.IsLeaf + " isFinal=" + ent.Value.IsFinal);
                    if (ent.Value.IsFinal)
                    {
                        Console.WriteLine("    finalOutput=" + outputs.OutputToString(ent.Value.FinalOutput));
                    }
                }
            }

            if (prefixes.Count <= 1)
            {
                Assert.IsNull(fst);
                return;
            }

            Assert.IsNotNull(fst);

            // make sure FST only enums valid prefixes
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: check pruned enum");
            }
            Int32sRefFSTEnum <T> fstEnum = new Int32sRefFSTEnum <T>(fst);

            Int32sRefFSTEnum.InputOutput <T> current;
            while ((current = fstEnum.Next()) != null)
            {
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("  fstEnum.next prefix=" + InputToString(inputMode, current.Input, false) + " output=" + outputs.OutputToString(current.Output));
                }
                CountMinOutput <T> cmo = prefixes.ContainsKey(current.Input) ? prefixes[current.Input] : null;
                Assert.IsNotNull(cmo);
                Assert.IsTrue(cmo.IsLeaf || cmo.IsFinal);
                //if (cmo.isFinal && !cmo.isLeaf) {
                if (cmo.IsFinal)
                {
                    Assert.AreEqual(cmo.FinalOutput, current.Output);
                }
                else
                {
                    Assert.AreEqual(cmo.Output, current.Output);
                }
            }

            // make sure all non-pruned prefixes are present in the FST
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: verify all prefixes");
            }
            int[] stopNode = new int[1];
            foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes)
            {
                if (ent.Key.Length > 0)
                {
                    CountMinOutput <T> cmo = ent.Value;
                    T output = Run(fst, ent.Key, stopNode);
                    if (LuceneTestCase.VERBOSE)
                    {
                        Console.WriteLine("TEST: verify prefix=" + InputToString(inputMode, ent.Key, false) + " output=" + outputs.OutputToString(cmo.Output));
                    }
                    // if (cmo.isFinal && !cmo.isLeaf) {
                    if (cmo.IsFinal)
                    {
                        Assert.AreEqual(cmo.FinalOutput, output);
                    }
                    else
                    {
                        Assert.AreEqual(cmo.Output, output);
                    }
                    Assert.AreEqual(ent.Key.Length, stopNode[0]);
                }
            }
        }
Example #3
0
        // FST is pruned
        private void VerifyPruned(int inputMode, FST <T> fst, int prune1, int prune2)
        {
            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("TEST: now verify pruned " + pairs.Count + " terms; outputs=" + outputs);
                foreach (InputOutput <T> pair in pairs)
                {
                    Console.WriteLine("  " + InputToString(inputMode, pair.Input) + ": " + outputs.OutputToString(pair.Output));
                }
            }

            // To validate the FST, we brute-force compute all prefixes
            // in the terms, matched to their "common" outputs, prune that
            // set according to the prune thresholds, then assert the FST
            // matches that same set.

            // NOTE: Crazy RAM intensive!!

            //System.out.println("TEST: tally prefixes");

            // build all prefixes

            // LUCENENET: We use ConcurrentDictionary<TKey, TValue> because Dictionary<TKey, TValue> doesn't support
            // deletion while iterating, but ConcurrentDictionary does.
            IDictionary <Int32sRef, CountMinOutput <T> > prefixes = new ConcurrentDictionary <Int32sRef, CountMinOutput <T> >();
            Int32sRef scratch = new Int32sRef(10);

            foreach (InputOutput <T> pair in pairs)
            {
                scratch.CopyInt32s(pair.Input);
                for (int idx = 0; idx <= pair.Input.Length; idx++)
                {
                    scratch.Length = idx;
                    if (!prefixes.TryGetValue(scratch, out CountMinOutput <T> cmo) || cmo == null)
                    {
                        cmo        = new CountMinOutput <T>();
                        cmo.Count  = 1;
                        cmo.Output = pair.Output;
                        prefixes[Int32sRef.DeepCopyOf(scratch)] = cmo;
                    }
                    else
                    {
                        cmo.Count++;
                        T output1 = cmo.Output;
                        if (output1.Equals(outputs.NoOutput))
                        {
                            output1 = outputs.NoOutput;
                        }
                        T output2 = pair.Output;
                        if (output2.Equals(outputs.NoOutput))
                        {
                            output2 = outputs.NoOutput;
                        }
                        cmo.Output = outputs.Common(output1, output2);
                    }
                    if (idx == pair.Input.Length)
                    {
                        cmo.IsFinal     = true;
                        cmo.FinalOutput = cmo.Output;
                    }
                }
            }

            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("TEST: now prune");
            }

            // prune 'em
            using (var it = prefixes.GetEnumerator())
            {
                while (it.MoveNext())
                {
                    var                ent    = it.Current;
                    Int32sRef          prefix = ent.Key;
                    CountMinOutput <T> cmo    = ent.Value;
                    if (LuceneTestCase.Verbose)
                    {
                        Console.WriteLine("  term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal);
                    }
                    bool keep;
                    if (prune1 > 0)
                    {
                        keep = cmo.Count >= prune1;
                    }
                    else
                    {
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(prune2 > 0);
                        }
                        if (prune2 > 1 && cmo.Count >= prune2)
                        {
                            keep = true;
                        }
                        else if (prefix.Length > 0)
                        {
                            // consult our parent
                            scratch.Length = prefix.Length - 1;
                            Array.Copy(prefix.Int32s, prefix.Offset, scratch.Int32s, 0, scratch.Length);
                            keep = prefixes.TryGetValue(scratch, out CountMinOutput <T> cmo2) && cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1)));
                            //System.out.println("    parent count = " + (cmo2 == null ? -1 : cmo2.count));
                        }
                        else if (cmo.Count >= prune2)
                        {
                            keep = true;
                        }
                        else
                        {
                            keep = false;
                        }
                    }

                    if (!keep)
                    {
                        //it.remove();
                        prefixes.Remove(ent);
                        //System.out.println("    remove");
                    }
                    else
                    {
                        // clear isLeaf for all ancestors
                        //System.out.println("    keep");
                        scratch.CopyInt32s(prefix);
                        scratch.Length--;
                        while (scratch.Length >= 0)
                        {
                            if (prefixes.TryGetValue(scratch, out CountMinOutput <T> cmo2) && cmo2 != null)
                            {
                                //System.out.println("    clear isLeaf " + inputToString(inputMode, scratch));
                                cmo2.IsLeaf = false;
                            }
                            scratch.Length--;
                        }
                    }
                }
            }

            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("TEST: after prune");
                foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes)
                {
                    Console.WriteLine("  " + InputToString(inputMode, ent.Key, false) + ": isLeaf=" + ent.Value.IsLeaf + " isFinal=" + ent.Value.IsFinal);
                    if (ent.Value.IsFinal)
                    {
                        Console.WriteLine("    finalOutput=" + outputs.OutputToString(ent.Value.FinalOutput));
                    }
                }
            }

            if (prefixes.Count <= 1)
            {
                Assert.IsNull(fst);
                return;
            }

            Assert.IsNotNull(fst);

            // make sure FST only enums valid prefixes
            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("TEST: check pruned enum");
            }
            Int32sRefFSTEnum <T> fstEnum = new Int32sRefFSTEnum <T>(fst);

            Int32sRefFSTEnum.InputOutput <T> current;
            while ((current = fstEnum.Next()) != null)
            {
                if (LuceneTestCase.Verbose)
                {
                    Console.WriteLine("  fstEnum.next prefix=" + InputToString(inputMode, current.Input, false) + " output=" + outputs.OutputToString(current.Output));
                }
                prefixes.TryGetValue(current.Input, out CountMinOutput <T> cmo);
                Assert.IsNotNull(cmo);
                Assert.IsTrue(cmo.IsLeaf || cmo.IsFinal);
                //if (cmo.isFinal && !cmo.isLeaf) {
                if (cmo.IsFinal)
                {
                    Assert.AreEqual(cmo.FinalOutput, current.Output);
                }
                else
                {
                    Assert.AreEqual(cmo.Output, current.Output);
                }
            }

            // make sure all non-pruned prefixes are present in the FST
            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("TEST: verify all prefixes");
            }
            int[] stopNode = new int[1];
            foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes)
            {
                if (ent.Key.Length > 0)
                {
                    CountMinOutput <T> cmo = ent.Value;
                    T output = Run(fst, ent.Key, stopNode);
                    if (LuceneTestCase.Verbose)
                    {
                        Console.WriteLine("TEST: verify prefix=" + InputToString(inputMode, ent.Key, false) + " output=" + outputs.OutputToString(cmo.Output));
                    }
                    // if (cmo.isFinal && !cmo.isLeaf) {
                    if (cmo.IsFinal)
                    {
                        Assert.AreEqual(cmo.FinalOutput, output);
                    }
                    else
                    {
                        Assert.AreEqual(cmo.Output, output);
                    }
                    Assert.AreEqual(ent.Key.Length, stopNode[0]);
                }
            }
        }