public PairOutputs(Outputs <A> outputs1, Outputs <B> outputs2) { this.outputs1 = outputs1; this.outputs2 = outputs2; NO_OUTPUT = new Pair(outputs1.NoOutput, outputs2.NoOutput); }
public FSTTesterHelper(Random random, Directory dir, int inputMode, List <InputOutput <T> > pairs, Outputs <T> outputs, bool doReverseLookup) : base(random, dir, inputMode, pairs, outputs, doReverseLookup) { }
public FSTTester(Random random, Directory dir, int inputMode, IList <InputOutput <T> > pairs, Outputs <T> outputs, bool doReverseLookup) { this.random = random; this.dir = dir; this.inputMode = inputMode; this.pairs = pairs; this.outputs = outputs; this.doReverseLookup = doReverseLookup; }
public ListOfOutputs(Outputs <T> outputs) { this.outputs = outputs; }
public virtual void Test() { int[] ints = new int[7]; IntsRef input = new IntsRef(ints, 0, ints.Length); int seed = Random().Next(); Directory dir = new MMapDirectory(CreateTempDir("2BFST")); for (int doPackIter = 0; doPackIter < 2; doPackIter++) { bool doPack = doPackIter == 1; // Build FST w/ NoOutputs and stop when nodeCount > 2.2B if (!doPack) { Console.WriteLine("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS"); Outputs <object> outputs = NoOutputs.Singleton; object NO_OUTPUT = outputs.NoOutput; Builder <object> b = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15); int count = 0; Random r = new Random(seed); int[] ints2 = new int[200]; IntsRef input2 = new IntsRef(ints2, 0, ints2.Length); while (true) { //System.out.println("add: " + input + " -> " + output); for (int i = 10; i < ints2.Length; i++) { ints2[i] = r.Next(256); } b.Add(input2, NO_OUTPUT); count++; if (count % 100000 == 0) { Console.WriteLine(count + ": " + b.FstSizeInBytes() + " bytes; " + b.TotStateCount + " nodes"); } if (b.TotStateCount > int.MaxValue + 100L * 1024 * 1024) { break; } NextInput(r, ints2); } FST <object> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints2, 0); r = new Random(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(NO_OUTPUT, Util.Get(fst, input2)); NextInput(r, ints2); } Console.WriteLine("\nTEST: enum all input/outputs"); IntsRefFSTEnum <object> fstEnum = new IntsRefFSTEnum <object>(fst); Arrays.Fill(ints2, 0); r = new Random(seed); int upto = 0; while (true) { IntsRefFSTEnum <object> .InputOutput <object> pair = fstEnum.Next(); if (pair == null) { break; } for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(input2, pair.Input); Assert.AreEqual(NO_OUTPUT, pair.Output); upto++; NextInput(r, ints2); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <object>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ ByteSequenceOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes"); Outputs <BytesRef> outputs = ByteSequenceOutputs.Singleton; Builder <BytesRef> b = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15); var outputBytes = new byte[20]; BytesRef output = new BytesRef(outputBytes); Arrays.Fill(ints, 0); int count = 0; Random r = new Random(seed); while (true) { r.NextBytes(outputBytes); //System.out.println("add: " + input + " -> " + output); b.Add(input, BytesRef.DeepCopyOf(output)); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.FstSizeInBytes() + " bytes"); } if (b.FstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST <BytesRef> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); r = new Random(seed); Arrays.Fill(ints, 0); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } r.NextBytes((byte[])(Array)outputBytes); Assert.AreEqual(output, Util.Get(fst, input)); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); IntsRefFSTEnum <BytesRef> fstEnum = new IntsRefFSTEnum <BytesRef>(fst); Arrays.Fill(ints, 0); r = new Random(seed); int upto = 0; while (true) { IntsRefFSTEnum <BytesRef> .InputOutput <BytesRef> pair = fstEnum.Next(); if (pair == null) { break; } Assert.AreEqual(input, pair.Input); r.NextBytes((byte[])(Array)outputBytes); Assert.AreEqual(output, pair.Output); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <BytesRef>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ PositiveIntOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long"); Outputs <long?> outputs = PositiveIntOutputs.Singleton; Builder <long?> b = new Builder <long?>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInts.COMPACT, true, 15); long output = 1; Arrays.Fill(ints, 0); int count = 0; Random r = new Random(seed); while (true) { //System.out.println("add: " + input + " -> " + output); b.Add(input, output); output += 1 + r.Next(10); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.FstSizeInBytes() + " bytes"); } if (b.FstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST <long?> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.SizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints, 0); output = 1; r = new Random(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } // forward lookup: Assert.AreEqual(output, (long)Util.Get(fst, input)); // reverse lookup: Assert.AreEqual(input, Util.GetByOutput(fst, output)); output += 1 + r.Next(10); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); IntsRefFSTEnum <long?> fstEnum = new IntsRefFSTEnum <long?>(fst); Arrays.Fill(ints, 0); r = new Random(seed); int upto = 0; output = 1; while (true) { IntsRefFSTEnum <long?> .InputOutput <long?> pair = fstEnum.Next(); if (pair == null) { break; } Assert.AreEqual(input, pair.Input); Assert.AreEqual(output, pair.Output.Value); output += 1 + r.Next(10); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <long?>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } } dir.Dispose(); }
/// <summary> /// Instantiates an FST/FSA builder without any pruning. A shortcut /// to <see cref="Builder.Builder(FST.INPUT_TYPE, int, int, bool, bool, int, Outputs{T}, FreezeTail{T}, bool, float, bool, int)"/> /// with pruning options turned off. /// </summary> public Builder(FST.INPUT_TYPE inputType, Outputs <T> outputs) : this(inputType, 0, 0, true, true, int.MaxValue, outputs, null, false, PackedInt32s.COMPACT, true, 15) { var x = new System.Text.StringBuilder(); }
/// <summary> /// Instantiates an FST/FSA builder without any pruning. A shortcut /// to <see cref="Builder{T}.Builder(FST.INPUT_TYPE, int, int, bool, bool, int, Outputs{T}, FreezeTail{T}, bool, float, bool, int)"/> /// with pruning options turned off. /// </summary> public Builder(FST.INPUT_TYPE inputType, Outputs <T> outputs) : this(inputType, 0, 0, true, true, int.MaxValue, outputs, null, false, PackedInt32s.COMPACT, true, 15) { }
/// <summary> /// Instantiates an FST/FSA builder with all the possible tuning and construction /// tweaks. Read parameter documentation carefully. /// </summary> /// <param name="inputType"> /// The input type (transition labels). Can be anything from <seealso cref="INPUT_TYPE"/> /// enumeration. Shorter types will consume less memory. Strings (character sequences) are /// represented as <seealso cref="INPUT_TYPE#BYTE4"/> (full unicode codepoints). /// </param> /// <param name="minSuffixCount1"> /// If pruning the input graph during construction, this threshold is used for telling /// if a node is kept or pruned. If transition_count(node) >= minSuffixCount1, the node /// is kept. /// </param> /// <param name="minSuffixCount2"> /// (Note: only Mike McCandless knows what this one is really doing...) /// </param> /// <param name="doShareSuffix"> /// If <code>true</code>, the shared suffixes will be compacted into unique paths. /// this requires an additional RAM-intensive hash map for lookups in memory. Setting this parameter to /// <code>false</code> creates a single suffix path for all input sequences. this will result in a larger /// FST, but requires substantially less memory and CPU during building. /// </param> /// <param name="doShareNonSingletonNodes"> /// Only used if doShareSuffix is true. Set this to /// true to ensure FST is fully minimal, at cost of more /// CPU and more RAM during building. /// </param> /// <param name="shareMaxTailLength"> /// Only used if doShareSuffix is true. Set this to /// Integer.MAX_VALUE to ensure FST is fully minimal, at cost of more /// CPU and more RAM during building. /// </param> /// <param name="outputs"> The output type for each input sequence. Applies only if building an FST. For /// FSA, use <seealso cref="NoOutputs#getSingleton()"/> and <seealso cref="NoOutputs#getNoOutput()"/> as the /// singleton output object. /// </param> /// <param name="doPackFST"> Pass true to create a packed FST. /// </param> /// <param name="acceptableOverheadRatio"> How to trade speed for space when building the FST. this option </param> /// is only relevant when doPackFST is true. <seealso cref= PackedInts#getMutable(int, int, float) /// </seealso> /// <param name="allowArrayArcs"> Pass false to disable the array arc optimization /// while building the FST; this will make the resulting /// FST smaller but slower to traverse. /// </param> /// <param name="bytesPageBits"> How many bits wide to make each /// byte[] block in the BytesStore; if you know the FST /// will be large then make this larger. For example 15 /// bits = 32768 byte pages. </param> public Builder(FST <T> .INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, bool doShareSuffix, bool doShareNonSingletonNodes, int shareMaxTailLength, Outputs <T> outputs, FreezeTail <T> freezeTail, bool doPackFST, float acceptableOverheadRatio, bool allowArrayArcs, int bytesPageBits) { this.MinSuffixCount1 = minSuffixCount1; this.MinSuffixCount2 = minSuffixCount2; this.FreezeTail_Renamed = freezeTail; this.DoShareNonSingletonNodes = doShareNonSingletonNodes; this.ShareMaxTailLength = shareMaxTailLength; this.DoPackFST = doPackFST; this.AcceptableOverheadRatio = acceptableOverheadRatio; Fst = new FST <T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs, bytesPageBits); if (doShareSuffix) { DedupHash = new NodeHash <T>(Fst, Fst.Bytes.GetReverseReader(false)); } else { DedupHash = null; } NO_OUTPUT = outputs.NoOutput; UnCompiledNode <T>[] f = (UnCompiledNode <T>[]) new UnCompiledNode <T> [10]; Frontier = f; for (int idx = 0; idx < Frontier.Length; idx++) { Frontier[idx] = new UnCompiledNode <T>(this, idx); } }