示例#1
0
        // for debugging

        /*
         * private String toString(BytesRef b) {
         * try {
         *  return b.utf8ToString() + " " + b;
         * } catch (Throwable t) {
         *  return b.toString();
         * }
         * }
         */

        /// <summary>
        /// It's OK to add the same input twice in a row with
        /// different outputs, as long as outputs impls the merge
        /// method. Note that input is fully consumed after this
        /// method is returned (so caller is free to reuse), but
        /// output is not.  So if your outputs are changeable (eg
        /// <see cref="ByteSequenceOutputs"/> or
        /// <see cref="Int32SequenceOutputs"/>) then you cannot reuse across
        /// calls.
        /// </summary>
        public virtual void Add(Int32sRef input, T output)
        {
            /*
             * if (DEBUG) {
             * BytesRef b = new BytesRef(input.length);
             * for(int x=0;x<input.length;x++) {
             *  b.bytes[x] = (byte) input.ints[x];
             * }
             * b.length = input.length;
             * if (output == NO_OUTPUT) {
             *  System.out.println("\nFST ADD: input=" + toString(b) + " " + b);
             * } else {
             *  System.out.println("\nFST ADD: input=" + toString(b) + " " + b + " output=" + fst.outputs.outputToString(output));
             * }
             * }
             */

            // De-dup NO_OUTPUT since it must be a singleton:
            if (output.Equals(NO_OUTPUT))
            {
                output = NO_OUTPUT;
            }

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(lastInput.Length == 0 || input.CompareTo(lastInput) >= 0, "inputs are added out of order lastInput={0} vs input={1}", lastInput, input);
                Debugging.Assert(ValidOutput(output));
            }

            //System.out.println("\nadd: " + input);
            if (input.Length == 0)
            {
                // empty input: only allowed as first input.  we have
                // to special case this because the packed FST
                // format cannot represent the empty input since
                // 'finalness' is stored on the incoming arc, not on
                // the node
                frontier[0].InputCount++;
                frontier[0].IsFinal = true;
                fst.EmptyOutput     = output;
                return;
            }

            // compare shared prefix length
            int pos1     = 0;
            int pos2     = input.Offset;
            int pos1Stop = Math.Min(lastInput.Length, input.Length);

            while (true)
            {
                frontier[pos1].InputCount++;
                //System.out.println("  incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" + frontier[pos1]);
                if (pos1 >= pos1Stop || lastInput.Int32s[pos1] != input.Int32s[pos2])
                {
                    break;
                }
                pos1++;
                pos2++;
            }
            int prefixLenPlus1 = pos1 + 1;

            if (frontier.Length < input.Length + 1)
            {
                UnCompiledNode <T>[] next = new UnCompiledNode <T> [ArrayUtil.Oversize(input.Length + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
                Array.Copy(frontier, 0, next, 0, frontier.Length);
                for (int idx = frontier.Length; idx < next.Length; idx++)
                {
                    next[idx] = new UnCompiledNode <T>(this, idx);
                }
                frontier = next;
            }

            // minimize/compile states from previous input's
            // orphan'd suffix
            DoFreezeTail(prefixLenPlus1);

            // init tail states for current input
            for (int idx = prefixLenPlus1; idx <= input.Length; idx++)
            {
                frontier[idx - 1].AddArc(input.Int32s[input.Offset + idx - 1], frontier[idx]);
                frontier[idx].InputCount++;
            }

            UnCompiledNode <T> lastNode = frontier[input.Length];

            if (lastInput.Length != input.Length || prefixLenPlus1 != input.Length + 1)
            {
                lastNode.IsFinal = true;
                lastNode.Output  = NO_OUTPUT;
            }

            // push conflicting outputs forward, only as far as
            // needed
            for (int idx = 1; idx < prefixLenPlus1; idx++)
            {
                UnCompiledNode <T> node       = frontier[idx];
                UnCompiledNode <T> parentNode = frontier[idx - 1];

                T lastOutput = parentNode.GetLastOutput(input.Int32s[input.Offset + idx - 1]);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(ValidOutput(lastOutput));
                }

                T commonOutputPrefix;
                T wordSuffix;

                if (!lastOutput.Equals(NO_OUTPUT))
                {
                    commonOutputPrefix = fst.Outputs.Common(output, lastOutput);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(ValidOutput(commonOutputPrefix));
                    }
                    wordSuffix = fst.Outputs.Subtract(lastOutput, commonOutputPrefix);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(ValidOutput(wordSuffix));
                    }
                    parentNode.SetLastOutput(input.Int32s[input.Offset + idx - 1], commonOutputPrefix);
                    node.PrependOutput(wordSuffix);
                }
                else
                {
                    commonOutputPrefix = /*wordSuffix =*/ NO_OUTPUT; // LUCENENET: Removed unnecessary assignment
                }

                output = fst.Outputs.Subtract(output, commonOutputPrefix);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(ValidOutput(output));
                }
            }

            if (lastInput.Length == input.Length && prefixLenPlus1 == 1 + input.Length)
            {
                // same input more than 1 time in a row, mapping to
                // multiple outputs
                lastNode.Output = fst.Outputs.Merge(lastNode.Output, output);
            }
            else
            {
                // this new arc is private to this new input; set its
                // arc output to the leftover output:
                frontier[prefixLenPlus1 - 1].SetLastOutput(input.Int32s[input.Offset + prefixLenPlus1 - 1], output);
            }

            // save last input
            lastInput.CopyInt32s(input);

            //System.out.println("  count[0]=" + frontier[0].inputCount);
        }
示例#2
0
        // FST is complete
        private void VerifyUnPruned(int inputMode, FST <T> fst)
        {
            FST <long?>  fstLong;
            ISet <long?> validOutputs;
            long         minLong = long.MaxValue;
            long         maxLong = long.MinValue;

            if (doReverseLookup)
            {
                FST <long?> fstLong0 = fst as FST <long?>;
                fstLong      = fstLong0;
                validOutputs = new HashSet <long?>();
                foreach (InputOutput <T> pair in pairs)
                {
                    long?output = pair.Output as long?;
                    maxLong = Math.Max(maxLong, output.Value);
                    minLong = Math.Min(minLong, output.Value);
                    validOutputs.Add(output.Value);
                }
            }
            else
            {
                fstLong      = null;
                validOutputs = null;
            }

            if (pairs.Count == 0)
            {
                Assert.IsNull(fst);
                return;
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: now verify " + pairs.Count + " terms");
                foreach (InputOutput <T> pair in pairs)
                {
                    Assert.IsNotNull(pair);
                    Assert.IsNotNull(pair.Input);
                    Assert.IsNotNull(pair.Output);
                    Console.WriteLine("  " + InputToString(inputMode, pair.Input) + ": " + outputs.OutputToString(pair.Output));
                }
            }

            Assert.IsNotNull(fst);

            // visit valid pairs in order -- make sure all words
            // are accepted, and FSTEnum's next() steps through
            // them correctly
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: check valid terms/next()");
            }
            {
                Int32sRefFSTEnum <T> fstEnum = new Int32sRefFSTEnum <T>(fst);
                foreach (InputOutput <T> pair in pairs)
                {
                    Int32sRef term = pair.Input;
                    if (LuceneTestCase.VERBOSE)
                    {
                        Console.WriteLine("TEST: check term=" + InputToString(inputMode, term) + " output=" + fst.Outputs.OutputToString(pair.Output));
                    }
                    T output = Run(fst, term, null);
                    Assert.IsNotNull(output, "term " + InputToString(inputMode, term) + " is not accepted");
                    Assert.IsTrue(OutputsEqual(pair.Output, output));

                    // verify enum's next
                    Int32sRefFSTEnum.InputOutput <T> t = fstEnum.Next();
                    Assert.IsNotNull(t);
                    Assert.AreEqual(term, t.Input, "expected input=" + InputToString(inputMode, term) + " but fstEnum returned " + InputToString(inputMode, t.Input));
                    Assert.IsTrue(OutputsEqual(pair.Output, t.Output));
                }
                Assert.IsNull(fstEnum.Next());
            }

            IDictionary <Int32sRef, T> termsMap = new Dictionary <Int32sRef, T>();

            foreach (InputOutput <T> pair in pairs)
            {
                termsMap[pair.Input] = pair.Output;
            }

            if (doReverseLookup && maxLong > minLong)
            {
                // Do random lookups so we test null (output doesn't
                // exist) case:
                Assert.IsNull(Util.GetByOutput(fstLong, minLong - 7));
                Assert.IsNull(Util.GetByOutput(fstLong, maxLong + 7));

                int num = LuceneTestCase.AtLeast(random, 100);
                for (int iter = 0; iter < num; iter++)
                {
                    long      v     = TestUtil.NextInt64(random, minLong, maxLong);
                    Int32sRef input = Util.GetByOutput(fstLong, v);
                    Assert.IsTrue(validOutputs.Contains(v) || input == null);
                }
            }

            // find random matching word and make sure it's valid
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: verify random accepted terms");
            }
            Int32sRef scratch = new Int32sRef(10);
            int       num_    = LuceneTestCase.AtLeast(random, 500);

            for (int iter = 0; iter < num_; iter++)
            {
                T output = RandomAcceptedWord(fst, scratch);
                Assert.IsTrue(termsMap.ContainsKey(scratch), "accepted word " + InputToString(inputMode, scratch) + " is not valid");
                Assert.IsTrue(OutputsEqual(termsMap[scratch], output));

                if (doReverseLookup)
                {
                    //System.out.println("lookup output=" + output + " outs=" + fst.Outputs);
                    Int32sRef input = Util.GetByOutput(fstLong, (output as long?).Value);
                    Assert.IsNotNull(input);
                    //System.out.println("  got " + Util.toBytesRef(input, new BytesRef()).utf8ToString());
                    Assert.AreEqual(scratch, input);
                }
            }

            // test IntsRefFSTEnum.Seek:
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: verify seek");
            }
            Int32sRefFSTEnum <T> fstEnum_ = new Int32sRefFSTEnum <T>(fst);

            num_ = LuceneTestCase.AtLeast(random, 100);
            for (int iter = 0; iter < num_; iter++)
            {
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("  iter=" + iter);
                }
                if (random.NextBoolean())
                {
                    // seek to term that doesn't exist:
                    while (true)
                    {
                        Int32sRef term = ToInt32sRef(GetRandomString(random), inputMode);
                        int       pos  = pairs.BinarySearch(new InputOutput <T>(term, default(T)));
                        if (pos < 0)
                        {
                            pos = -(pos + 1);
                            // ok doesn't exist
                            //System.out.println("  seek " + inputToString(inputMode, term));
                            Int32sRefFSTEnum.InputOutput <T> seekResult;
                            if (random.Next(3) == 0)
                            {
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("  do non-exist seekExact term=" + InputToString(inputMode, term));
                                }
                                seekResult = fstEnum_.SeekExact(term);
                                pos        = -1;
                            }
                            else if (random.NextBoolean())
                            {
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("  do non-exist seekFloor term=" + InputToString(inputMode, term));
                                }
                                seekResult = fstEnum_.SeekFloor(term);
                                pos--;
                            }
                            else
                            {
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("  do non-exist seekCeil term=" + InputToString(inputMode, term));
                                }
                                seekResult = fstEnum_.SeekCeil(term);
                            }

                            if (pos != -1 && pos < pairs.Count)
                            {
                                //System.out.println("    got " + inputToString(inputMode,seekResult.input) + " output=" + fst.Outputs.outputToString(seekResult.Output));
                                Assert.IsNotNull(seekResult, "got null but expected term=" + InputToString(inputMode, pairs[pos].Input));
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("    got " + InputToString(inputMode, seekResult.Input));
                                }
                                Assert.AreEqual(pairs[pos].Input, seekResult.Input, "expected " + InputToString(inputMode, pairs[pos].Input) + " but got " + InputToString(inputMode, seekResult.Input));
                                Assert.IsTrue(OutputsEqual(pairs[pos].Output, seekResult.Output));
                            }
                            else
                            {
                                // seeked before start or beyond end
                                //System.out.println("seek=" + seekTerm);
                                Assert.IsNull(seekResult, "expected null but got " + (seekResult == null ? "null" : InputToString(inputMode, seekResult.Input)));
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("    got null");
                                }
                            }

                            break;
                        }
                    }
                }
                else
                {
                    // seek to term that does exist:
                    InputOutput <T> pair = pairs[random.Next(pairs.Count)];
                    Int32sRefFSTEnum.InputOutput <T> seekResult;
                    if (random.Next(3) == 2)
                    {
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do exists seekExact term=" + InputToString(inputMode, pair.Input));
                        }
                        seekResult = fstEnum_.SeekExact(pair.Input);
                    }
                    else if (random.NextBoolean())
                    {
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do exists seekFloor " + InputToString(inputMode, pair.Input));
                        }
                        seekResult = fstEnum_.SeekFloor(pair.Input);
                    }
                    else
                    {
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do exists seekCeil " + InputToString(inputMode, pair.Input));
                        }
                        seekResult = fstEnum_.SeekCeil(pair.Input);
                    }
                    Assert.IsNotNull(seekResult);
                    Assert.AreEqual(pair.Input, seekResult.Input, "got " + InputToString(inputMode, seekResult.Input) + " but expected " + InputToString(inputMode, pair.Input));
                    Assert.IsTrue(OutputsEqual(pair.Output, seekResult.Output));
                }
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: mixed next/seek");
            }

            // test mixed next/seek
            num_ = LuceneTestCase.AtLeast(random, 100);
            for (int iter = 0; iter < num_; iter++)
            {
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("TEST: iter " + iter);
                }
                // reset:
                fstEnum_ = new Int32sRefFSTEnum <T>(fst);
                int upto = -1;
                while (true)
                {
                    bool isDone = false;
                    if (upto == pairs.Count - 1 || random.NextBoolean())
                    {
                        // next
                        upto++;
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do next");
                        }
                        isDone = fstEnum_.Next() == null;
                    }
                    else if (upto != -1 && upto < 0.75 * pairs.Count && random.NextBoolean())
                    {
                        int attempt = 0;
                        for (; attempt < 10; attempt++)
                        {
                            Int32sRef term = ToInt32sRef(GetRandomString(random), inputMode);
                            if (!termsMap.ContainsKey(term) && term.CompareTo(pairs[upto].Input) > 0)
                            {
                                int pos = pairs.BinarySearch(new InputOutput <T>(term, default(T)));
                                Debug.Assert(pos < 0);
                                upto = -(pos + 1);

                                if (random.NextBoolean())
                                {
                                    upto--;
                                    Assert.IsTrue(upto != -1);
                                    if (LuceneTestCase.VERBOSE)
                                    {
                                        Console.WriteLine("  do non-exist seekFloor(" + InputToString(inputMode, term) + ")");
                                    }
                                    isDone = fstEnum_.SeekFloor(term) == null;
                                }
                                else
                                {
                                    if (LuceneTestCase.VERBOSE)
                                    {
                                        Console.WriteLine("  do non-exist seekCeil(" + InputToString(inputMode, term) + ")");
                                    }
                                    isDone = fstEnum_.SeekCeil(term) == null;
                                }

                                break;
                            }
                        }
                        if (attempt == 10)
                        {
                            continue;
                        }
                    }
                    else
                    {
                        int inc = random.Next(pairs.Count - upto - 1);
                        upto += inc;
                        if (upto == -1)
                        {
                            upto = 0;
                        }

                        if (random.NextBoolean())
                        {
                            if (LuceneTestCase.VERBOSE)
                            {
                                Console.WriteLine("  do seekCeil(" + InputToString(inputMode, pairs[upto].Input) + ")");
                            }
                            isDone = fstEnum_.SeekCeil(pairs[upto].Input) == null;
                        }
                        else
                        {
                            if (LuceneTestCase.VERBOSE)
                            {
                                Console.WriteLine("  do seekFloor(" + InputToString(inputMode, pairs[upto].Input) + ")");
                            }
                            isDone = fstEnum_.SeekFloor(pairs[upto].Input) == null;
                        }
                    }
                    if (LuceneTestCase.VERBOSE)
                    {
                        if (!isDone)
                        {
                            Console.WriteLine("    got " + InputToString(inputMode, fstEnum_.Current.Input));
                        }
                        else
                        {
                            Console.WriteLine("    got null");
                        }
                    }

                    if (upto == pairs.Count)
                    {
                        Assert.IsTrue(isDone);
                        break;
                    }
                    else
                    {
                        Assert.IsFalse(isDone);
                        Assert.AreEqual(pairs[upto].Input, fstEnum_.Current.Input);
                        Assert.IsTrue(OutputsEqual(pairs[upto].Output, fstEnum_.Current.Output));

                        /*
                         * if (upto < pairs.size()-1) {
                         * int tryCount = 0;
                         * while(tryCount < 10) {
                         * final IntsRef t = toIntsRef(getRandomString(), inputMode);
                         * if (pairs.get(upto).input.compareTo(t) < 0) {
                         * final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0;
                         * if (LuceneTestCase.VERBOSE) {
                         * System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected);
                         * }
                         * Assert.AreEqual(expected, fstEnum.beforeNext(t));
                         * break;
                         * }
                         * tryCount++;
                         * }
                         * }
                         */
                    }
                }
            }
        }