public override void Finish(long termsFilePointer) { fst = fstBuilder.Finish(); if (fst != null) { fst.Save(outerInstance.m_output); } }
public override void Finish(long termsFilePointer) { Fst = _fstBuilder.Finish(); if (Fst != null) { Fst.Save(_vgtiw.Output); } }
public override bool Store(DataOutput output) { output.WriteVInt64(count); if (fst == null) { return(false); } fst.Save(output); return(true); }
public override bool Store(DataOutput output) { CodecUtil.WriteHeader(output, CODEC_NAME, VERSION_CURRENT); output.WriteVInt64(count); output.WriteByte(separator); output.WriteVInt32(grams); output.WriteVInt64(totTokens); fst.Save(output); return(true); }
protected virtual void WriteFST(string filename) { FileInfo f = new FileInfo(filename); if (!f.Directory.Exists) { f.Directory.Create(); } fst.Save(f); }
public override bool Store(DataOutput output) { output.WriteVInt64(count); if (fst == null) { return(false); } fst.Save(output); output.WriteVInt32(maxAnalyzedPathsForOneInput); output.WriteByte((byte)(hasPayloads ? 1 : 0)); return(true); }
public override void Finish(long sumTotalTermFreq, long sumDocFreq, int docCount) { if (termCount > 0) { @out.WriteVInt32(termCount); @out.WriteVInt32(field.Number); if (field.IndexOptions != IndexOptions.DOCS_ONLY) { @out.WriteVInt64(sumTotalTermFreq); } @out.WriteVInt64(sumDocFreq); @out.WriteVInt32(docCount); FST <BytesRef> fst = builder.Finish(); fst.Save(@out); //System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer()); } }
private void WriteFST(FieldInfo field, IEnumerable <BytesRef> values) { meta.WriteVInt32(field.Number); meta.WriteByte(MemoryDocValuesProducer.FST); meta.WriteInt64(data.GetFilePointer()); PositiveInt32Outputs outputs = PositiveInt32Outputs.Singleton; var builder = new Builder <long?>(INPUT_TYPE.BYTE1, outputs); var scratch = new Int32sRef(); long ord = 0; foreach (BytesRef v in values) { builder.Add(Util.ToInt32sRef(v, scratch), ord); ord++; } FST <long?> fst = builder.Finish(); if (fst != null) { fst.Save(data); } meta.WriteVInt64(ord); }
internal virtual FST <T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2); } bool willRewrite = Random.NextBoolean(); Builder <T> builder = new Builder <T>(InputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, prune1, prune2, prune1 == 0 && prune2 == 0, allowRandomSuffixSharing ? Random.NextBoolean() : true, allowRandomSuffixSharing ? TestUtil.NextInt(Random, 1, 10) : int.MaxValue, Outputs, null, willRewrite, PackedInts.DEFAULT, true, 15); if (LuceneTestCase.VERBOSE) { if (willRewrite) { Console.WriteLine("TEST: packed FST"); } else { Console.WriteLine("TEST: non-packed FST"); } } foreach (InputOutput <T> pair in Pairs) { if (pair.Output is IList) { IList <long> longValues = (IList <long>)pair.Output; Builder <object> builderObject = builder as Builder <object>; foreach (long value in longValues) { builderObject.Add(pair.Input, value); } } else { builder.Add(pair.Input, pair.Output); } } FST <T> fst = builder.Finish(); if (Random.NextBoolean() && fst != null && !willRewrite) { IOContext context = LuceneTestCase.NewIOContext(Random); IndexOutput @out = Dir.CreateOutput("fst.bin", context); fst.Save(@out); @out.Dispose(); IndexInput @in = Dir.OpenInput("fst.bin", context); try { fst = new FST <T>(@in, Outputs); } finally { @in.Dispose(); Dir.DeleteFile("fst.bin"); } } if (LuceneTestCase.VERBOSE && Pairs.Count <= 20 && fst != null) { TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.Open), IOUtils.CHARSET_UTF_8); Util.toDot(fst, w, false, false); w.Close(); Console.WriteLine("SAVED out.dot"); } if (LuceneTestCase.VERBOSE) { if (fst == null) { Console.WriteLine(" fst has 0 nodes (fully pruned)"); } else { Console.WriteLine(" fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs"); } } if (prune1 == 0 && prune2 == 0) { VerifyUnPruned(InputMode, fst); } else { VerifyPruned(InputMode, fst, prune1, prune2); } return(fst); }
internal virtual FST <T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2); } bool willRewrite = random.NextBoolean(); Builder <T> builder = new Builder <T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, prune1, prune2, prune1 == 0 && prune2 == 0, allowRandomSuffixSharing ? random.NextBoolean() : true, allowRandomSuffixSharing ? TestUtil.NextInt32(random, 1, 10) : int.MaxValue, outputs, null, willRewrite, PackedInt32s.DEFAULT, true, 15); if (LuceneTestCase.VERBOSE) { if (willRewrite) { Console.WriteLine("TEST: packed FST"); } else { Console.WriteLine("TEST: non-packed FST"); } } foreach (InputOutput <T> pair in pairs) { if (pair.Output is IEnumerable) { Builder <object> builderObject = builder as Builder <object>; var values = pair.Output as IEnumerable; foreach (object value in values) { builderObject.Add(pair.Input, value); } } else { builder.Add(pair.Input, pair.Output); } } FST <T> fst = builder.Finish(); if (random.NextBoolean() && fst != null && !willRewrite) { IOContext context = LuceneTestCase.NewIOContext(random); using (IndexOutput @out = dir.CreateOutput("fst.bin", context)) { fst.Save(@out); } IndexInput @in = dir.OpenInput("fst.bin", context); try { fst = new FST <T>(@in, outputs); } finally { @in.Dispose(); dir.DeleteFile("fst.bin"); } } if (LuceneTestCase.VERBOSE && pairs.Count <= 20 && fst != null) { using (TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.OpenOrCreate), Encoding.UTF8)) { Util.ToDot(fst, w, false, false); } Console.WriteLine("SAVED out.dot"); } if (LuceneTestCase.VERBOSE) { if (fst == null) { Console.WriteLine(" fst has 0 nodes (fully pruned)"); } else { Console.WriteLine(" fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs"); } } if (prune1 == 0 && prune2 == 0) { VerifyUnPruned(inputMode, fst); } else { VerifyPruned(inputMode, fst, prune1, prune2); } return(fst); }
public virtual void Test() { int[] ints = new int[7]; Int32sRef input = new Int32sRef(ints, 0, ints.Length); int seed = Random.Next(); Directory dir = new MMapDirectory(CreateTempDir("2BFST")); for (int doPackIter = 0; doPackIter < 2; doPackIter++) { bool doPack = doPackIter == 1; // Build FST w/ NoOutputs and stop when nodeCount > 2.2B if (!doPack) { Console.WriteLine("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS"); Outputs <object> outputs = NoOutputs.Singleton; object NO_OUTPUT = outputs.NoOutput; Builder <object> b = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); int count = 0; Random r = new Random(seed); int[] ints2 = new int[200]; Int32sRef input2 = new Int32sRef(ints2, 0, ints2.Length); while (true) { //System.out.println("add: " + input + " -> " + output); for (int i = 10; i < ints2.Length; i++) { ints2[i] = r.Next(256); } b.Add(input2, NO_OUTPUT); count++; if (count % 100000 == 0) { Console.WriteLine(count + ": " + b.GetFstSizeInBytes() + " bytes; " + b.TotStateCount + " nodes"); } if (b.TotStateCount > int.MaxValue + 100L * 1024 * 1024) { break; } NextInput(r, ints2); } FST <object> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints2, 0); r = new Random(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(NO_OUTPUT, Util.Get(fst, input2)); NextInput(r, ints2); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <object> fstEnum = new Int32sRefFSTEnum <object>(fst); Arrays.Fill(ints2, 0); r = new Random(seed); int upto = 0; while (true) { Int32sRefFSTEnum.InputOutput <object> pair = fstEnum.Next(); if (pair == null) { break; } for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(input2, pair.Input); Assert.AreEqual(NO_OUTPUT, pair.Output); upto++; NextInput(r, ints2); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <object>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ ByteSequenceOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes"); Outputs <BytesRef> outputs = ByteSequenceOutputs.Singleton; Builder <BytesRef> b = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); var outputBytes = new byte[20]; BytesRef output = new BytesRef(outputBytes); Arrays.Fill(ints, 0); int count = 0; Random r = new Random(seed); while (true) { r.NextBytes(outputBytes); //System.out.println("add: " + input + " -> " + output); b.Add(input, BytesRef.DeepCopyOf(output)); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes"); } if (b.GetFstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST <BytesRef> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); r = new Random(seed); Arrays.Fill(ints, 0); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } r.NextBytes(outputBytes); Assert.AreEqual(output, Util.Get(fst, input)); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <BytesRef> fstEnum = new Int32sRefFSTEnum <BytesRef>(fst); Arrays.Fill(ints, 0); r = new Random(seed); int upto = 0; while (true) { Int32sRefFSTEnum.InputOutput <BytesRef> pair = fstEnum.Next(); if (pair == null) { break; } Assert.AreEqual(input, pair.Input); r.NextBytes(outputBytes); Assert.AreEqual(output, pair.Output); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <BytesRef>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ PositiveIntOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long"); Outputs <long?> outputs = PositiveInt32Outputs.Singleton; Builder <long?> b = new Builder <long?>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); long output = 1; Arrays.Fill(ints, 0); int count = 0; Random r = new Random(seed); while (true) { //System.out.println("add: " + input + " -> " + output); b.Add(input, output); output += 1 + r.Next(10); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes"); } if (b.GetFstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST <long?> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints, 0); output = 1; r = new Random(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } // forward lookup: Assert.AreEqual(output, (long)Util.Get(fst, input)); // reverse lookup: Assert.AreEqual(input, Util.GetByOutput(fst, output)); output += 1 + r.Next(10); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <long?> fstEnum = new Int32sRefFSTEnum <long?>(fst); Arrays.Fill(ints, 0); r = new Random(seed); int upto = 0; output = 1; while (true) { Int32sRefFSTEnum.InputOutput <long?> pair = fstEnum.Next(); if (pair == null) { break; } Assert.AreEqual(input, pair.Input); Assert.AreEqual(output, pair.Output.Value); output += 1 + r.Next(10); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <long?>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } } dir.Dispose(); }
public override void Finish(long termsFilePointer) { fst = fstBuilder.Finish(); if (fst != null) { fst.Save(output); } }
public override void Finish(long termsFilePointer) { Fst = _fstBuilder.Finish(); if (Fst != null) Fst.Save(_vgtiw.Output); }