Exemplo n.º 1
0
        internal virtual FST <T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing)
        {
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
            }

            bool willRewrite = Random.NextBoolean();

            Builder <T> builder = new Builder <T>(InputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, prune1, prune2, prune1 == 0 && prune2 == 0, allowRandomSuffixSharing ? Random.NextBoolean() : true, allowRandomSuffixSharing ? TestUtil.NextInt(Random, 1, 10) : int.MaxValue, Outputs, null, willRewrite, PackedInts.DEFAULT, true, 15);

            if (LuceneTestCase.VERBOSE)
            {
                if (willRewrite)
                {
                    Console.WriteLine("TEST: packed FST");
                }
                else
                {
                    Console.WriteLine("TEST: non-packed FST");
                }
            }

            foreach (InputOutput <T> pair in Pairs)
            {
                if (pair.Output is IList)
                {
                    IList <long>     longValues    = (IList <long>)pair.Output;
                    Builder <object> builderObject = builder as Builder <object>;
                    foreach (long value in longValues)
                    {
                        builderObject.Add(pair.Input, value);
                    }
                }
                else
                {
                    builder.Add(pair.Input, pair.Output);
                }
            }
            FST <T> fst = builder.Finish();

            if (Random.NextBoolean() && fst != null && !willRewrite)
            {
                IOContext   context = LuceneTestCase.NewIOContext(Random);
                IndexOutput @out    = Dir.CreateOutput("fst.bin", context);
                fst.Save(@out);
                @out.Dispose();
                IndexInput @in = Dir.OpenInput("fst.bin", context);
                try
                {
                    fst = new FST <T>(@in, Outputs);
                }
                finally
                {
                    @in.Dispose();
                    Dir.DeleteFile("fst.bin");
                }
            }

            if (LuceneTestCase.VERBOSE && Pairs.Count <= 20 && fst != null)
            {
                TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.Open), IOUtils.CHARSET_UTF_8);
                Util.toDot(fst, w, false, false);
                w.Close();
                Console.WriteLine("SAVED out.dot");
            }

            if (LuceneTestCase.VERBOSE)
            {
                if (fst == null)
                {
                    Console.WriteLine("  fst has 0 nodes (fully pruned)");
                }
                else
                {
                    Console.WriteLine("  fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs");
                }
            }

            if (prune1 == 0 && prune2 == 0)
            {
                VerifyUnPruned(InputMode, fst);
            }
            else
            {
                VerifyPruned(InputMode, fst, prune1, prune2);
            }

            return(fst);
        }
Exemplo n.º 2
0
        public virtual void Test()
        {
            int[]   ints  = new int[7];
            IntsRef input = new IntsRef(ints, 0, ints.Length);
            int     seed  = Random().Next();

            Directory dir = new MMapDirectory(CreateTempDir("2BFST"));

            for (int doPackIter = 0; doPackIter < 2; doPackIter++)
            {
                bool doPack = doPackIter == 1;

                // Build FST w/ NoOutputs and stop when nodeCount > 2.2B
                if (!doPack)
                {
                    Console.WriteLine("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
                    Outputs <object> outputs   = NoOutputs.Singleton;
                    object           NO_OUTPUT = outputs.NoOutput;
                    Builder <object> b         = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15);

                    int     count  = 0;
                    Random  r      = new Random(seed);
                    int[]   ints2  = new int[200];
                    IntsRef input2 = new IntsRef(ints2, 0, ints2.Length);
                    while (true)
                    {
                        //System.out.println("add: " + input + " -> " + output);
                        for (int i = 10; i < ints2.Length; i++)
                        {
                            ints2[i] = r.Next(256);
                        }
                        b.Add(input2, NO_OUTPUT);
                        count++;
                        if (count % 100000 == 0)
                        {
                            Console.WriteLine(count + ": " + b.FstSizeInBytes() + " bytes; " + b.TotStateCount + " nodes");
                        }
                        if (b.TotStateCount > int.MaxValue + 100L * 1024 * 1024)
                        {
                            break;
                        }
                        NextInput(r, ints2);
                    }

                    FST <object> fst = b.Finish();

                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        Arrays.Fill(ints2, 0);
                        r = new Random(seed);

                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }
                            for (int j = 10; j < ints2.Length; j++)
                            {
                                ints2[j] = r.Next(256);
                            }
                            Assert.AreEqual(NO_OUTPUT, Util.Get(fst, input2));
                            NextInput(r, ints2);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        IntsRefFSTEnum <object> fstEnum = new IntsRefFSTEnum <object>(fst);

                        Arrays.Fill(ints2, 0);
                        r = new Random(seed);
                        int upto = 0;
                        while (true)
                        {
                            IntsRefFSTEnum <object> .InputOutput <object> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            for (int j = 10; j < ints2.Length; j++)
                            {
                                ints2[j] = r.Next(256);
                            }
                            Assert.AreEqual(input2, pair.Input);
                            Assert.AreEqual(NO_OUTPUT, pair.Output);
                            upto++;
                            NextInput(r, ints2);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <object>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }

                // Build FST w/ ByteSequenceOutputs and stop when FST
                // size = 3GB
                {
                    Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes");
                    Outputs <BytesRef> outputs = ByteSequenceOutputs.Singleton;
                    Builder <BytesRef> b       = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15);

                    var      outputBytes = new byte[20];
                    BytesRef output      = new BytesRef(outputBytes);
                    Arrays.Fill(ints, 0);
                    int    count = 0;
                    Random r     = new Random(seed);
                    while (true)
                    {
                        r.NextBytes(outputBytes);
                        //System.out.println("add: " + input + " -> " + output);
                        b.Add(input, BytesRef.DeepCopyOf(output));
                        count++;
                        if (count % 1000000 == 0)
                        {
                            Console.WriteLine(count + "...: " + b.FstSizeInBytes() + " bytes");
                        }
                        if (b.FstSizeInBytes() > LIMIT)
                        {
                            break;
                        }
                        NextInput(r, ints);
                    }

                    FST <BytesRef> fst = b.Finish();
                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        r = new Random(seed);
                        Arrays.Fill(ints, 0);

                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }
                            r.NextBytes((byte[])(Array)outputBytes);
                            Assert.AreEqual(output, Util.Get(fst, input));
                            NextInput(r, ints);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        IntsRefFSTEnum <BytesRef> fstEnum = new IntsRefFSTEnum <BytesRef>(fst);

                        Arrays.Fill(ints, 0);
                        r = new Random(seed);
                        int upto = 0;
                        while (true)
                        {
                            IntsRefFSTEnum <BytesRef> .InputOutput <BytesRef> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            Assert.AreEqual(input, pair.Input);
                            r.NextBytes((byte[])(Array)outputBytes);
                            Assert.AreEqual(output, pair.Output);
                            upto++;
                            NextInput(r, ints);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <BytesRef>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }

                // Build FST w/ PositiveIntOutputs and stop when FST
                // size = 3GB
                {
                    Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long");
                    Outputs <long?> outputs = PositiveIntOutputs.Singleton;
                    Builder <long?> b       = new Builder <long?>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15);

                    long output = 1;

                    Arrays.Fill(ints, 0);
                    int    count = 0;
                    Random r     = new Random(seed);
                    while (true)
                    {
                        //System.out.println("add: " + input + " -> " + output);
                        b.Add(input, output);
                        output += 1 + r.Next(10);
                        count++;
                        if (count % 1000000 == 0)
                        {
                            Console.WriteLine(count + "...: " + b.FstSizeInBytes() + " bytes");
                        }
                        if (b.FstSizeInBytes() > LIMIT)
                        {
                            break;
                        }
                        NextInput(r, ints);
                    }

                    FST <long?> fst = b.Finish();

                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        Arrays.Fill(ints, 0);

                        output = 1;
                        r      = new Random(seed);
                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }

                            // forward lookup:
                            Assert.AreEqual(output, (long)Util.Get(fst, input));
                            // reverse lookup:
                            Assert.AreEqual(input, Util.GetByOutput(fst, output));
                            output += 1 + r.Next(10);
                            NextInput(r, ints);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        IntsRefFSTEnum <long?> fstEnum = new IntsRefFSTEnum <long?>(fst);

                        Arrays.Fill(ints, 0);
                        r = new Random(seed);
                        int upto = 0;
                        output = 1;
                        while (true)
                        {
                            IntsRefFSTEnum <long?> .InputOutput <long?> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            Assert.AreEqual(input, pair.Input);
                            Assert.AreEqual(output, pair.Output.Value);
                            output += 1 + r.Next(10);
                            upto++;
                            NextInput(r, ints);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <long?>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }
            }
            dir.Dispose();
        }
Exemplo n.º 3
0
        internal virtual FST <T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing)
        {
            if (LuceneTestCase.Verbose)
            {
                Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
            }

            bool willRewrite = random.NextBoolean();

            Builder <T> builder = new Builder <T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
                                                  prune1, prune2,
                                                  prune1 == 0 && prune2 == 0,
                                                  allowRandomSuffixSharing ? random.NextBoolean() : true,
                                                  allowRandomSuffixSharing ? TestUtil.NextInt32(random, 1, 10) : int.MaxValue,
                                                  outputs,
                                                  null,
                                                  willRewrite,
                                                  PackedInt32s.DEFAULT,
                                                  true,
                                                  15);

            if (LuceneTestCase.Verbose)
            {
                if (willRewrite)
                {
                    Console.WriteLine("TEST: packed FST");
                }
                else
                {
                    Console.WriteLine("TEST: non-packed FST");
                }
            }

            foreach (InputOutput <T> pair in pairs)
            {
                if (pair.Output is IEnumerable)
                {
                    Builder <object> builderObject = builder as Builder <object>;
                    var values = pair.Output as IEnumerable;
                    foreach (object value in values)
                    {
                        builderObject.Add(pair.Input, value);
                    }
                }
                else
                {
                    builder.Add(pair.Input, pair.Output);
                }
            }
            FST <T> fst = builder.Finish();

            if (random.NextBoolean() && fst != null && !willRewrite)
            {
                IOContext context = LuceneTestCase.NewIOContext(random);
                using (IndexOutput @out = dir.CreateOutput("fst.bin", context))
                {
                    fst.Save(@out);
                }
                IndexInput @in = dir.OpenInput("fst.bin", context);
                try
                {
                    fst = new FST <T>(@in, outputs);
                }
                finally
                {
                    @in.Dispose();
                    dir.DeleteFile("fst.bin");
                }
            }

            if (LuceneTestCase.Verbose && pairs.Count <= 20 && fst != null)
            {
                using (TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.OpenOrCreate), Encoding.UTF8))
                {
                    Util.ToDot(fst, w, false, false);
                }
                Console.WriteLine("SAVED out.dot");
            }

            if (LuceneTestCase.Verbose)
            {
                if (fst == null)
                {
                    Console.WriteLine("  fst has 0 nodes (fully pruned)");
                }
                else
                {
                    Console.WriteLine("  fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs");
                }
            }

            if (prune1 == 0 && prune2 == 0)
            {
                VerifyUnPruned(inputMode, fst);
            }
            else
            {
                VerifyPruned(inputMode, fst, prune1, prune2);
            }

            return(fst);
        }