public override void Finish(long termsFilePointer)
 {
     fst = fstBuilder.Finish();
     if (fst != null)
     {
         fst.Save(outerInstance.m_output);
     }
 }
 public override void Finish(long termsFilePointer)
 {
     Fst = _fstBuilder.Finish();
     if (Fst != null)
     {
         Fst.Save(_vgtiw.Output);
     }
 }
Esempio n. 3
0
 public override bool Store(DataOutput output)
 {
     output.WriteVInt64(count);
     if (fst == null)
     {
         return(false);
     }
     fst.Save(output);
     return(true);
 }
Esempio n. 4
0
 public override bool Store(DataOutput output)
 {
     CodecUtil.WriteHeader(output, CODEC_NAME, VERSION_CURRENT);
     output.WriteVInt64(count);
     output.WriteByte(separator);
     output.WriteVInt32(grams);
     output.WriteVInt64(totTokens);
     fst.Save(output);
     return(true);
 }
Esempio n. 5
0
        protected virtual void WriteFST(string filename)
        {
            FileInfo f = new FileInfo(filename);

            if (!f.Directory.Exists)
            {
                f.Directory.Create();
            }
            fst.Save(f);
        }
Esempio n. 6
0
        public override bool Store(DataOutput output)
        {
            output.WriteVInt64(count);
            if (fst == null)
            {
                return(false);
            }

            fst.Save(output);
            output.WriteVInt32(maxAnalyzedPathsForOneInput);
            output.WriteByte((byte)(hasPayloads ? 1 : 0));
            return(true);
        }
 public override void Finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
 {
     if (termCount > 0)
     {
         @out.WriteVInt32(termCount);
         @out.WriteVInt32(field.Number);
         if (field.IndexOptions != IndexOptions.DOCS_ONLY)
         {
             @out.WriteVInt64(sumTotalTermFreq);
         }
         @out.WriteVInt64(sumDocFreq);
         @out.WriteVInt32(docCount);
         FST <BytesRef> fst = builder.Finish();
         fst.Save(@out);
         //System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
     }
 }
Esempio n. 8
0
        private void WriteFST(FieldInfo field, IEnumerable <BytesRef> values)
        {
            meta.WriteVInt32(field.Number);
            meta.WriteByte(MemoryDocValuesProducer.FST);
            meta.WriteInt64(data.GetFilePointer());
            PositiveInt32Outputs outputs = PositiveInt32Outputs.Singleton;
            var  builder = new Builder <long?>(INPUT_TYPE.BYTE1, outputs);
            var  scratch = new Int32sRef();
            long ord     = 0;

            foreach (BytesRef v in values)
            {
                builder.Add(Util.ToInt32sRef(v, scratch), ord);
                ord++;
            }
            FST <long?> fst = builder.Finish();

            if (fst != null)
            {
                fst.Save(data);
            }
            meta.WriteVInt64(ord);
        }
Esempio n. 9
0
        internal virtual FST <T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing)
        {
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
            }

            bool willRewrite = Random.NextBoolean();

            Builder <T> builder = new Builder <T>(InputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, prune1, prune2, prune1 == 0 && prune2 == 0, allowRandomSuffixSharing ? Random.NextBoolean() : true, allowRandomSuffixSharing ? TestUtil.NextInt(Random, 1, 10) : int.MaxValue, Outputs, null, willRewrite, PackedInts.DEFAULT, true, 15);

            if (LuceneTestCase.VERBOSE)
            {
                if (willRewrite)
                {
                    Console.WriteLine("TEST: packed FST");
                }
                else
                {
                    Console.WriteLine("TEST: non-packed FST");
                }
            }

            foreach (InputOutput <T> pair in Pairs)
            {
                if (pair.Output is IList)
                {
                    IList <long>     longValues    = (IList <long>)pair.Output;
                    Builder <object> builderObject = builder as Builder <object>;
                    foreach (long value in longValues)
                    {
                        builderObject.Add(pair.Input, value);
                    }
                }
                else
                {
                    builder.Add(pair.Input, pair.Output);
                }
            }
            FST <T> fst = builder.Finish();

            if (Random.NextBoolean() && fst != null && !willRewrite)
            {
                IOContext   context = LuceneTestCase.NewIOContext(Random);
                IndexOutput @out    = Dir.CreateOutput("fst.bin", context);
                fst.Save(@out);
                @out.Dispose();
                IndexInput @in = Dir.OpenInput("fst.bin", context);
                try
                {
                    fst = new FST <T>(@in, Outputs);
                }
                finally
                {
                    @in.Dispose();
                    Dir.DeleteFile("fst.bin");
                }
            }

            if (LuceneTestCase.VERBOSE && Pairs.Count <= 20 && fst != null)
            {
                TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.Open), IOUtils.CHARSET_UTF_8);
                Util.toDot(fst, w, false, false);
                w.Close();
                Console.WriteLine("SAVED out.dot");
            }

            if (LuceneTestCase.VERBOSE)
            {
                if (fst == null)
                {
                    Console.WriteLine("  fst has 0 nodes (fully pruned)");
                }
                else
                {
                    Console.WriteLine("  fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs");
                }
            }

            if (prune1 == 0 && prune2 == 0)
            {
                VerifyUnPruned(InputMode, fst);
            }
            else
            {
                VerifyPruned(InputMode, fst, prune1, prune2);
            }

            return(fst);
        }
Esempio n. 10
0
        internal virtual FST <T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing)
        {
            if (LuceneTestCase.VERBOSE)
            {
                Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
            }

            bool willRewrite = random.NextBoolean();

            Builder <T> builder = new Builder <T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
                                                  prune1, prune2,
                                                  prune1 == 0 && prune2 == 0,
                                                  allowRandomSuffixSharing ? random.NextBoolean() : true,
                                                  allowRandomSuffixSharing ? TestUtil.NextInt32(random, 1, 10) : int.MaxValue,
                                                  outputs,
                                                  null,
                                                  willRewrite,
                                                  PackedInt32s.DEFAULT,
                                                  true,
                                                  15);

            if (LuceneTestCase.VERBOSE)
            {
                if (willRewrite)
                {
                    Console.WriteLine("TEST: packed FST");
                }
                else
                {
                    Console.WriteLine("TEST: non-packed FST");
                }
            }

            foreach (InputOutput <T> pair in pairs)
            {
                if (pair.Output is IEnumerable)
                {
                    Builder <object> builderObject = builder as Builder <object>;
                    var values = pair.Output as IEnumerable;
                    foreach (object value in values)
                    {
                        builderObject.Add(pair.Input, value);
                    }
                }
                else
                {
                    builder.Add(pair.Input, pair.Output);
                }
            }
            FST <T> fst = builder.Finish();

            if (random.NextBoolean() && fst != null && !willRewrite)
            {
                IOContext context = LuceneTestCase.NewIOContext(random);
                using (IndexOutput @out = dir.CreateOutput("fst.bin", context))
                {
                    fst.Save(@out);
                }
                IndexInput @in = dir.OpenInput("fst.bin", context);
                try
                {
                    fst = new FST <T>(@in, outputs);
                }
                finally
                {
                    @in.Dispose();
                    dir.DeleteFile("fst.bin");
                }
            }

            if (LuceneTestCase.VERBOSE && pairs.Count <= 20 && fst != null)
            {
                using (TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.OpenOrCreate), Encoding.UTF8))
                {
                    Util.ToDot(fst, w, false, false);
                }
                Console.WriteLine("SAVED out.dot");
            }

            if (LuceneTestCase.VERBOSE)
            {
                if (fst == null)
                {
                    Console.WriteLine("  fst has 0 nodes (fully pruned)");
                }
                else
                {
                    Console.WriteLine("  fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs");
                }
            }

            if (prune1 == 0 && prune2 == 0)
            {
                VerifyUnPruned(inputMode, fst);
            }
            else
            {
                VerifyPruned(inputMode, fst, prune1, prune2);
            }

            return(fst);
        }
Esempio n. 11
0
        public virtual void Test()
        {
            int[]     ints  = new int[7];
            Int32sRef input = new Int32sRef(ints, 0, ints.Length);
            int       seed  = Random.Next();

            Directory dir = new MMapDirectory(CreateTempDir("2BFST"));

            for (int doPackIter = 0; doPackIter < 2; doPackIter++)
            {
                bool doPack = doPackIter == 1;

                // Build FST w/ NoOutputs and stop when nodeCount > 2.2B
                if (!doPack)
                {
                    Console.WriteLine("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
                    Outputs <object> outputs   = NoOutputs.Singleton;
                    object           NO_OUTPUT = outputs.NoOutput;
                    Builder <object> b         = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15);

                    int       count  = 0;
                    Random    r      = new Random(seed);
                    int[]     ints2  = new int[200];
                    Int32sRef input2 = new Int32sRef(ints2, 0, ints2.Length);
                    while (true)
                    {
                        //System.out.println("add: " + input + " -> " + output);
                        for (int i = 10; i < ints2.Length; i++)
                        {
                            ints2[i] = r.Next(256);
                        }
                        b.Add(input2, NO_OUTPUT);
                        count++;
                        if (count % 100000 == 0)
                        {
                            Console.WriteLine(count + ": " + b.GetFstSizeInBytes() + " bytes; " + b.TotStateCount + " nodes");
                        }
                        if (b.TotStateCount > int.MaxValue + 100L * 1024 * 1024)
                        {
                            break;
                        }
                        NextInput(r, ints2);
                    }

                    FST <object> fst = b.Finish();

                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        Arrays.Fill(ints2, 0);
                        r = new Random(seed);

                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }
                            for (int j = 10; j < ints2.Length; j++)
                            {
                                ints2[j] = r.Next(256);
                            }
                            Assert.AreEqual(NO_OUTPUT, Util.Get(fst, input2));
                            NextInput(r, ints2);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        Int32sRefFSTEnum <object> fstEnum = new Int32sRefFSTEnum <object>(fst);

                        Arrays.Fill(ints2, 0);
                        r = new Random(seed);
                        int upto = 0;
                        while (true)
                        {
                            Int32sRefFSTEnum.InputOutput <object> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            for (int j = 10; j < ints2.Length; j++)
                            {
                                ints2[j] = r.Next(256);
                            }
                            Assert.AreEqual(input2, pair.Input);
                            Assert.AreEqual(NO_OUTPUT, pair.Output);
                            upto++;
                            NextInput(r, ints2);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <object>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }

                // Build FST w/ ByteSequenceOutputs and stop when FST
                // size = 3GB
                {
                    Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes");
                    Outputs <BytesRef> outputs = ByteSequenceOutputs.Singleton;
                    Builder <BytesRef> b       = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15);

                    var      outputBytes = new byte[20];
                    BytesRef output      = new BytesRef(outputBytes);
                    Arrays.Fill(ints, 0);
                    int    count = 0;
                    Random r     = new Random(seed);
                    while (true)
                    {
                        r.NextBytes(outputBytes);
                        //System.out.println("add: " + input + " -> " + output);
                        b.Add(input, BytesRef.DeepCopyOf(output));
                        count++;
                        if (count % 1000000 == 0)
                        {
                            Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes");
                        }
                        if (b.GetFstSizeInBytes() > LIMIT)
                        {
                            break;
                        }
                        NextInput(r, ints);
                    }

                    FST <BytesRef> fst = b.Finish();
                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        r = new Random(seed);
                        Arrays.Fill(ints, 0);

                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }
                            r.NextBytes(outputBytes);
                            Assert.AreEqual(output, Util.Get(fst, input));
                            NextInput(r, ints);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        Int32sRefFSTEnum <BytesRef> fstEnum = new Int32sRefFSTEnum <BytesRef>(fst);

                        Arrays.Fill(ints, 0);
                        r = new Random(seed);
                        int upto = 0;
                        while (true)
                        {
                            Int32sRefFSTEnum.InputOutput <BytesRef> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            Assert.AreEqual(input, pair.Input);
                            r.NextBytes(outputBytes);
                            Assert.AreEqual(output, pair.Output);
                            upto++;
                            NextInput(r, ints);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <BytesRef>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }

                // Build FST w/ PositiveIntOutputs and stop when FST
                // size = 3GB
                {
                    Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long");
                    Outputs <long?> outputs = PositiveInt32Outputs.Singleton;
                    Builder <long?> b       = new Builder <long?>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15);

                    long output = 1;

                    Arrays.Fill(ints, 0);
                    int    count = 0;
                    Random r     = new Random(seed);
                    while (true)
                    {
                        //System.out.println("add: " + input + " -> " + output);
                        b.Add(input, output);
                        output += 1 + r.Next(10);
                        count++;
                        if (count % 1000000 == 0)
                        {
                            Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes");
                        }
                        if (b.GetFstSizeInBytes() > LIMIT)
                        {
                            break;
                        }
                        NextInput(r, ints);
                    }

                    FST <long?> fst = b.Finish();

                    for (int verify = 0; verify < 2; verify++)
                    {
                        Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]");

                        Arrays.Fill(ints, 0);

                        output = 1;
                        r      = new Random(seed);
                        for (int i = 0; i < count; i++)
                        {
                            if (i % 1000000 == 0)
                            {
                                Console.WriteLine(i + "...: ");
                            }

                            // forward lookup:
                            Assert.AreEqual(output, (long)Util.Get(fst, input));
                            // reverse lookup:
                            Assert.AreEqual(input, Util.GetByOutput(fst, output));
                            output += 1 + r.Next(10);
                            NextInput(r, ints);
                        }

                        Console.WriteLine("\nTEST: enum all input/outputs");
                        Int32sRefFSTEnum <long?> fstEnum = new Int32sRefFSTEnum <long?>(fst);

                        Arrays.Fill(ints, 0);
                        r = new Random(seed);
                        int upto = 0;
                        output = 1;
                        while (true)
                        {
                            Int32sRefFSTEnum.InputOutput <long?> pair = fstEnum.Next();
                            if (pair == null)
                            {
                                break;
                            }
                            Assert.AreEqual(input, pair.Input);
                            Assert.AreEqual(output, pair.Output.Value);
                            output += 1 + r.Next(10);
                            upto++;
                            NextInput(r, ints);
                        }
                        Assert.AreEqual(count, upto);

                        if (verify == 0)
                        {
                            Console.WriteLine("\nTEST: save/load FST and re-verify");
                            IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT);
                            fst.Save(@out);
                            @out.Dispose();
                            IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT);
                            fst = new FST <long?>(@in, outputs);
                            @in.Dispose();
                        }
                        else
                        {
                            dir.DeleteFile("fst");
                        }
                    }
                }
            }
            dir.Dispose();
        }
 public override void Finish(long termsFilePointer)
 {
     fst = fstBuilder.Finish();
     if (fst != null)
     {
         fst.Save(output);
     }
 }
 public override void Finish(long termsFilePointer)
 {
     Fst = _fstBuilder.Finish();
     if (Fst != null)
         Fst.Save(_vgtiw.Output);
 }