Example #1
0
        public virtual void Test()
        {
            int[]   ints  = new int[7];
            IntsRef input = new IntsRef(ints, 0, ints.Length);
            int     seed  = Random().Next();

            Directory dir = new MMapDirectory(CreateTempDir("2BFST"));

            for (int doPackIter = 0; doPackIter < 2; doPackIter++)
            {
                bool doPack = doPackIter == 1;

                // Build FST w/ NoOutputs and stop when nodeCount > 2.2B
                if (!doPack)
                {
                    Console.WriteLine("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
                    Outputs <object> outputs   = NoOutputs.Singleton;
                    object           NO_OUTPUT = outputs.NoOutput;
                    Builder <object> b         = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15);

                    int     count  = 0;
                    Random  r      = new Random(seed);
                    int[]   ints2  = new int[200];
                    IntsRef input2 = new IntsRef(ints2, 0, ints2.Length);
                    while (true)
                    {
                        //System.out.println("add: " + input + " -> " + output);
                        for (int i = 10; i < ints2.Length; i++)
                        {
                            ints2[i] = r.Next(256);
                        }
                        b.Add(input2, NO_OUTPUT);
                        count++;
                        if (count % 100000 == 0)
                        {
                            Console.WriteLine(count + ": " + b.FstSizeInBytes() + " bytes; " + b.TotStateCount + " nodes");
                        }
                        if (b.TotStateCount > int.MaxValue + 100L * 1024 * 1024)
                        {
                            break;
                        }
                        NextInput(r, ints2);
                    }

                    FST <object> fst = b.Finish();

                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        Arrays.Fill(ints2, 0);
                        r = new Random(seed);

                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }
                            for (int j = 10; j < ints2.Length; j++)
                            {
                                ints2[j] = r.Next(256);
                            }
                            Assert.AreEqual(NO_OUTPUT, Util.Get(fst, input2));
                            NextInput(r, ints2);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        IntsRefFSTEnum <object> fstEnum = new IntsRefFSTEnum <object>(fst);

                        Arrays.Fill(ints2, 0);
                        r = new Random(seed);
                        int upto = 0;
                        while (true)
                        {
                            IntsRefFSTEnum.InputOutput <object> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            for (int j = 10; j < ints2.Length; j++)
                            {
                                ints2[j] = r.Next(256);
                            }
                            Assert.AreEqual(input2, pair.Input);
                            Assert.AreEqual(NO_OUTPUT, pair.Output);
                            upto++;
                            NextInput(r, ints2);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <object>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }

                // Build FST w/ ByteSequenceOutputs and stop when FST
                // size = 3GB
                {
                    Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes");
                    Outputs <BytesRef> outputs = ByteSequenceOutputs.Singleton;
                    Builder <BytesRef> b       = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15);

                    var      outputBytes = new byte[20];
                    BytesRef output      = new BytesRef(outputBytes);
                    Arrays.Fill(ints, 0);
                    int    count = 0;
                    Random r     = new Random(seed);
                    while (true)
                    {
                        r.NextBytes(outputBytes);
                        //System.out.println("add: " + input + " -> " + output);
                        b.Add(input, BytesRef.DeepCopyOf(output));
                        count++;
                        if (count % 1000000 == 0)
                        {
                            Console.WriteLine(count + "...: " + b.FstSizeInBytes() + " bytes");
                        }
                        if (b.FstSizeInBytes() > LIMIT)
                        {
                            break;
                        }
                        NextInput(r, ints);
                    }

                    FST <BytesRef> fst = b.Finish();
                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        r = new Random(seed);
                        Arrays.Fill(ints, 0);

                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }
                            r.NextBytes((byte[])(Array)outputBytes);
                            Assert.AreEqual(output, Util.Get(fst, input));
                            NextInput(r, ints);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        IntsRefFSTEnum <BytesRef> fstEnum = new IntsRefFSTEnum <BytesRef>(fst);

                        Arrays.Fill(ints, 0);
                        r = new Random(seed);
                        int upto = 0;
                        while (true)
                        {
                            IntsRefFSTEnum.InputOutput <BytesRef> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            Assert.AreEqual(input, pair.Input);
                            r.NextBytes((byte[])(Array)outputBytes);
                            Assert.AreEqual(output, pair.Output);
                            upto++;
                            NextInput(r, ints);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <BytesRef>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }

                // Build FST w/ PositiveIntOutputs and stop when FST
                // size = 3GB
                {
                    Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long");
                    Outputs <long?> outputs = PositiveIntOutputs.Singleton;
                    Builder <long?> b       = new Builder <long?>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15);

                    long output = 1;

                    Arrays.Fill(ints, 0);
                    int    count = 0;
                    Random r     = new Random(seed);
                    while (true)
                    {
                        //System.out.println("add: " + input + " -> " + output);
                        b.Add(input, output);
                        output += 1 + r.Next(10);
                        count++;
                        if (count % 1000000 == 0)
                        {
                            Console.WriteLine(count + "...: " + b.FstSizeInBytes() + " bytes");
                        }
                        if (b.FstSizeInBytes() > LIMIT)
                        {
                            break;
                        }
                        NextInput(r, ints);
                    }

                    FST <long?> fst = b.Finish();

                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        Arrays.Fill(ints, 0);

                        output = 1;
                        r      = new Random(seed);
                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }

                            // forward lookup:
                            Assert.AreEqual(output, (long)Util.Get(fst, input));
                            // reverse lookup:
                            Assert.AreEqual(input, Util.GetByOutput(fst, output));
                            output += 1 + r.Next(10);
                            NextInput(r, ints);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        IntsRefFSTEnum <long?> fstEnum = new IntsRefFSTEnum <long?>(fst);

                        Arrays.Fill(ints, 0);
                        r = new Random(seed);
                        int upto = 0;
                        output = 1;
                        while (true)
                        {
                            IntsRefFSTEnum.InputOutput <long?> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            Assert.AreEqual(input, pair.Input);
                            Assert.AreEqual(output, pair.Output.Value);
                            output += 1 + r.Next(10);
                            upto++;
                            NextInput(r, ints);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <long?>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }
            }
            dir.Dispose();
        }
Example #2
0
        // FST is complete
        private void VerifyUnPruned(int inputMode, FST <T> fst)
        {
            FST <long?>  fstLong;
            ISet <long?> validOutputs;
            long         minLong = long.MaxValue;
            long         maxLong = long.MinValue;

            if (DoReverseLookup)
            {
                FST <long?> fstLong0 = fst as FST <long?>;
                fstLong      = fstLong0;
                validOutputs = new HashSet <long?>();
                foreach (InputOutput <T> pair in Pairs)
                {
                    long?output = pair.Output as long?;
                    maxLong = Math.Max(maxLong, output.Value);
                    minLong = Math.Min(minLong, output.Value);
                    validOutputs.Add(output.Value);
                }
            }
            else
            {
                fstLong      = null;
                validOutputs = null;
            }

            if (Pairs.Count == 0)
            {
                Assert.IsNull(fst);
                return;
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: now verify " + Pairs.Count + " terms");
                foreach (InputOutput <T> pair in Pairs)
                {
                    Assert.IsNotNull(pair);
                    Assert.IsNotNull(pair.Input);
                    Assert.IsNotNull(pair.Output);
                    Console.WriteLine("  " + InputToString(inputMode, pair.Input) + ": " + Outputs.OutputToString(pair.Output));
                }
            }

            Assert.IsNotNull(fst);

            // visit valid pairs in order -- make sure all words
            // are accepted, and FSTEnum's next() steps through
            // them correctly
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: check valid terms/next()");
            }
            {
                IntsRefFSTEnum <T> fstEnum = new IntsRefFSTEnum <T>(fst);
                foreach (InputOutput <T> pair in Pairs)
                {
                    IntsRef term = pair.Input;
                    if (LuceneTestCase.VERBOSE)
                    {
                        Console.WriteLine("TEST: check term=" + InputToString(inputMode, term) + " output=" + fst.Outputs.OutputToString(pair.Output));
                    }
                    T output = Run(fst, term, null);
                    Assert.IsNotNull(output, "term " + InputToString(inputMode, term) + " is not accepted");
                    Assert.IsTrue(OutputsEqual(pair.Output, output));

                    // verify enum's next
                    IntsRefFSTEnum.InputOutput <T> t = fstEnum.Next();
                    Assert.IsNotNull(t);
                    Assert.AreEqual(term, t.Input, "expected input=" + InputToString(inputMode, term) + " but fstEnum returned " + InputToString(inputMode, t.Input));
                    Assert.IsTrue(OutputsEqual(pair.Output, t.Output));
                }
                Assert.IsNull(fstEnum.Next());
            }

            IDictionary <IntsRef, T> termsMap = new Dictionary <IntsRef, T>();

            foreach (InputOutput <T> pair in Pairs)
            {
                termsMap[pair.Input] = pair.Output;
            }

            if (DoReverseLookup && maxLong > minLong)
            {
                // Do random lookups so we test null (output doesn't
                // exist) case:
                Assert.IsNull(Util.GetByOutput(fstLong, minLong - 7));
                Assert.IsNull(Util.GetByOutput(fstLong, maxLong + 7));

                int num = LuceneTestCase.AtLeast(Random, 100);
                for (int iter = 0; iter < num; iter++)
                {
                    long    v     = TestUtil.NextLong(Random, minLong, maxLong);
                    IntsRef input = Util.GetByOutput(fstLong, v);
                    Assert.IsTrue(validOutputs.Contains(v) || input == null);
                }
            }

            // find random matching word and make sure it's valid
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: verify random accepted terms");
            }
            IntsRef scratch = new IntsRef(10);
            int     num_    = LuceneTestCase.AtLeast(Random, 500);

            for (int iter = 0; iter < num_; iter++)
            {
                T output = RandomAcceptedWord(fst, scratch);
                Assert.IsTrue(termsMap.ContainsKey(scratch), "accepted word " + InputToString(inputMode, scratch) + " is not valid");
                Assert.IsTrue(OutputsEqual(termsMap[scratch], output));

                if (DoReverseLookup)
                {
                    //System.out.println("lookup output=" + output + " outs=" + fst.Outputs);
                    IntsRef input = Util.GetByOutput(fstLong, (output as long?).Value);
                    Assert.IsNotNull(input);
                    //System.out.println("  got " + Util.toBytesRef(input, new BytesRef()).utf8ToString());
                    Assert.AreEqual(scratch, input);
                }
            }

            // test IntsRefFSTEnum.Seek:
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: verify seek");
            }
            IntsRefFSTEnum <T> fstEnum_ = new IntsRefFSTEnum <T>(fst);

            num_ = LuceneTestCase.AtLeast(Random, 100);
            for (int iter = 0; iter < num_; iter++)
            {
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("  iter=" + iter);
                }
                if (Random.NextBoolean())
                {
                    // seek to term that doesn't exist:
                    while (true)
                    {
                        IntsRef term = ToIntsRef(GetRandomString(Random), inputMode);
                        int     pos  = Pairs.BinarySearch(new InputOutput <T>(term, default(T)));
                        if (pos < 0)
                        {
                            pos = -(pos + 1);
                            // ok doesn't exist
                            //System.out.println("  seek " + inputToString(inputMode, term));
                            IntsRefFSTEnum.InputOutput <T> seekResult;
                            if (Random.Next(3) == 0)
                            {
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("  do non-exist seekExact term=" + InputToString(inputMode, term));
                                }
                                seekResult = fstEnum_.SeekExact(term);
                                pos        = -1;
                            }
                            else if (Random.NextBoolean())
                            {
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("  do non-exist seekFloor term=" + InputToString(inputMode, term));
                                }
                                seekResult = fstEnum_.SeekFloor(term);
                                pos--;
                            }
                            else
                            {
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("  do non-exist seekCeil term=" + InputToString(inputMode, term));
                                }
                                seekResult = fstEnum_.SeekCeil(term);
                            }

                            if (pos != -1 && pos < Pairs.Count)
                            {
                                //System.out.println("    got " + inputToString(inputMode,seekResult.input) + " output=" + fst.Outputs.outputToString(seekResult.Output));
                                Assert.IsNotNull(seekResult, "got null but expected term=" + InputToString(inputMode, Pairs[pos].Input));
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("    got " + InputToString(inputMode, seekResult.Input));
                                }
                                Assert.AreEqual(Pairs[pos].Input, seekResult.Input, "expected " + InputToString(inputMode, Pairs[pos].Input) + " but got " + InputToString(inputMode, seekResult.Input));
                                Assert.IsTrue(OutputsEqual(Pairs[pos].Output, seekResult.Output));
                            }
                            else
                            {
                                // seeked before start or beyond end
                                //System.out.println("seek=" + seekTerm);
                                Assert.IsNull(seekResult, "expected null but got " + (seekResult == null ? "null" : InputToString(inputMode, seekResult.Input)));
                                if (LuceneTestCase.VERBOSE)
                                {
                                    Console.WriteLine("    got null");
                                }
                            }

                            break;
                        }
                    }
                }
                else
                {
                    // seek to term that does exist:
                    InputOutput <T> pair = Pairs[Random.Next(Pairs.Count)];
                    IntsRefFSTEnum.InputOutput <T> seekResult;
                    if (Random.Next(3) == 2)
                    {
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do exists seekExact term=" + InputToString(inputMode, pair.Input));
                        }
                        seekResult = fstEnum_.SeekExact(pair.Input);
                    }
                    else if (Random.NextBoolean())
                    {
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do exists seekFloor " + InputToString(inputMode, pair.Input));
                        }
                        seekResult = fstEnum_.SeekFloor(pair.Input);
                    }
                    else
                    {
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do exists seekCeil " + InputToString(inputMode, pair.Input));
                        }
                        seekResult = fstEnum_.SeekCeil(pair.Input);
                    }
                    Assert.IsNotNull(seekResult);
                    Assert.AreEqual(pair.Input, seekResult.Input, "got " + InputToString(inputMode, seekResult.Input) + " but expected " + InputToString(inputMode, pair.Input));
                    Assert.IsTrue(OutputsEqual(pair.Output, seekResult.Output));
                }
            }

            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("TEST: mixed next/seek");
            }

            // test mixed next/seek
            num_ = LuceneTestCase.AtLeast(Random, 100);
            for (int iter = 0; iter < num_; iter++)
            {
                if (LuceneTestCase.VERBOSE)
                {
                    Console.WriteLine("TEST: iter " + iter);
                }
                // reset:
                fstEnum_ = new IntsRefFSTEnum <T>(fst);
                int upto = -1;
                while (true)
                {
                    bool isDone = false;
                    if (upto == Pairs.Count - 1 || Random.NextBoolean())
                    {
                        // next
                        upto++;
                        if (LuceneTestCase.VERBOSE)
                        {
                            Console.WriteLine("  do next");
                        }
                        isDone = fstEnum_.Next() == null;
                    }
                    else if (upto != -1 && upto < 0.75 * Pairs.Count && Random.NextBoolean())
                    {
                        int attempt = 0;
                        for (; attempt < 10; attempt++)
                        {
                            IntsRef term = ToIntsRef(GetRandomString(Random), inputMode);
                            if (!termsMap.ContainsKey(term) && term.CompareTo(Pairs[upto].Input) > 0)
                            {
                                int pos = Pairs.BinarySearch(new InputOutput <T>(term, default(T)));
                                Debug.Assert(pos < 0);
                                upto = -(pos + 1);

                                if (Random.NextBoolean())
                                {
                                    upto--;
                                    Assert.IsTrue(upto != -1);
                                    if (LuceneTestCase.VERBOSE)
                                    {
                                        Console.WriteLine("  do non-exist seekFloor(" + InputToString(inputMode, term) + ")");
                                    }
                                    isDone = fstEnum_.SeekFloor(term) == null;
                                }
                                else
                                {
                                    if (LuceneTestCase.VERBOSE)
                                    {
                                        Console.WriteLine("  do non-exist seekCeil(" + InputToString(inputMode, term) + ")");
                                    }
                                    isDone = fstEnum_.SeekCeil(term) == null;
                                }

                                break;
                            }
                        }
                        if (attempt == 10)
                        {
                            continue;
                        }
                    }
                    else
                    {
                        int inc = Random.Next(Pairs.Count - upto - 1);
                        upto += inc;
                        if (upto == -1)
                        {
                            upto = 0;
                        }

                        if (Random.NextBoolean())
                        {
                            if (LuceneTestCase.VERBOSE)
                            {
                                Console.WriteLine("  do seekCeil(" + InputToString(inputMode, Pairs[upto].Input) + ")");
                            }
                            isDone = fstEnum_.SeekCeil(Pairs[upto].Input) == null;
                        }
                        else
                        {
                            if (LuceneTestCase.VERBOSE)
                            {
                                Console.WriteLine("  do seekFloor(" + InputToString(inputMode, Pairs[upto].Input) + ")");
                            }
                            isDone = fstEnum_.SeekFloor(Pairs[upto].Input) == null;
                        }
                    }
                    if (LuceneTestCase.VERBOSE)
                    {
                        if (!isDone)
                        {
                            Console.WriteLine("    got " + InputToString(inputMode, fstEnum_.Current().Input));
                        }
                        else
                        {
                            Console.WriteLine("    got null");
                        }
                    }

                    if (upto == Pairs.Count)
                    {
                        Assert.IsTrue(isDone);
                        break;
                    }
                    else
                    {
                        Assert.IsFalse(isDone);
                        Assert.AreEqual(Pairs[upto].Input, fstEnum_.Current().Input);
                        Assert.IsTrue(OutputsEqual(Pairs[upto].Output, fstEnum_.Current().Output));

                        /*
                         * if (upto < pairs.size()-1) {
                         * int tryCount = 0;
                         * while(tryCount < 10) {
                         * final IntsRef t = toIntsRef(getRandomString(), inputMode);
                         * if (pairs.get(upto).input.compareTo(t) < 0) {
                         * final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0;
                         * if (LuceneTestCase.VERBOSE) {
                         * System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected);
                         * }
                         * Assert.AreEqual(expected, fstEnum.beforeNext(t));
                         * break;
                         * }
                         * tryCount++;
                         * }
                         * }
                         */
                    }
                }
            }
        }