// FST is complete private void VerifyUnPruned(int inputMode, FST <T> fst) { FST <long?> fstLong; ISet <long?> validOutputs; long minLong = long.MaxValue; long maxLong = long.MinValue; if (doReverseLookup) { FST <long?> fstLong0 = fst as FST <long?>; fstLong = fstLong0; validOutputs = new HashSet <long?>(); foreach (InputOutput <T> pair in pairs) { long?output = pair.Output as long?; maxLong = Math.Max(maxLong, output.Value); minLong = Math.Min(minLong, output.Value); validOutputs.Add(output.Value); } } else { fstLong = null; validOutputs = null; } if (pairs.Count == 0) { Assert.IsNull(fst); return; } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: now verify " + pairs.Count + " terms"); foreach (InputOutput <T> pair in pairs) { Assert.IsNotNull(pair); Assert.IsNotNull(pair.Input); Assert.IsNotNull(pair.Output); Console.WriteLine(" " + InputToString(inputMode, pair.Input) + ": " + outputs.OutputToString(pair.Output)); } } Assert.IsNotNull(fst); // visit valid pairs in order -- make sure all words // are accepted, and FSTEnum's next() steps through // them correctly if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: check valid terms/next()"); } { Int32sRefFSTEnum <T> fstEnum = new Int32sRefFSTEnum <T>(fst); foreach (InputOutput <T> pair in pairs) { Int32sRef term = pair.Input; if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: check term=" + InputToString(inputMode, term) + " output=" + fst.Outputs.OutputToString(pair.Output)); } T output = Run(fst, term, null); Assert.IsNotNull(output, "term " + InputToString(inputMode, term) + " is not accepted"); Assert.IsTrue(OutputsEqual(pair.Output, output)); // verify enum's next Int32sRefFSTEnum.InputOutput <T> t = fstEnum.Next(); Assert.IsNotNull(t); Assert.AreEqual(term, t.Input, "expected input=" + InputToString(inputMode, term) + " but fstEnum returned " + InputToString(inputMode, t.Input)); Assert.IsTrue(OutputsEqual(pair.Output, t.Output)); } Assert.IsNull(fstEnum.Next()); } IDictionary <Int32sRef, T> termsMap = new Dictionary <Int32sRef, T>(); foreach (InputOutput <T> pair in pairs) { termsMap[pair.Input] = pair.Output; } if (doReverseLookup && maxLong > minLong) { // Do random lookups so we test null (output doesn't // exist) case: Assert.IsNull(Util.GetByOutput(fstLong, minLong - 7)); Assert.IsNull(Util.GetByOutput(fstLong, maxLong + 7)); int num = LuceneTestCase.AtLeast(random, 100); for (int iter = 0; iter < num; iter++) { long v = TestUtil.NextInt64(random, minLong, maxLong); Int32sRef input = Util.GetByOutput(fstLong, v); Assert.IsTrue(validOutputs.Contains(v) || input == null); } } // find random matching word and make sure it's valid if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify random accepted terms"); } Int32sRef scratch = new Int32sRef(10); int num_ = LuceneTestCase.AtLeast(random, 500); for (int iter = 0; iter < num_; iter++) { T output = RandomAcceptedWord(fst, scratch); Assert.IsTrue(termsMap.ContainsKey(scratch), "accepted word " + InputToString(inputMode, scratch) + " is not valid"); Assert.IsTrue(OutputsEqual(termsMap[scratch], output)); if (doReverseLookup) { //System.out.println("lookup output=" + output + " outs=" + fst.Outputs); Int32sRef input = Util.GetByOutput(fstLong, (output as long?).Value); Assert.IsNotNull(input); //System.out.println(" got " + Util.toBytesRef(input, new BytesRef()).utf8ToString()); Assert.AreEqual(scratch, input); } } // test IntsRefFSTEnum.Seek: if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify seek"); } Int32sRefFSTEnum <T> fstEnum_ = new Int32sRefFSTEnum <T>(fst); num_ = LuceneTestCase.AtLeast(random, 100); for (int iter = 0; iter < num_; iter++) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" iter=" + iter); } if (random.NextBoolean()) { // seek to term that doesn't exist: while (true) { Int32sRef term = ToInt32sRef(GetRandomString(random), inputMode); int pos = pairs.BinarySearch(new InputOutput <T>(term, default(T))); if (pos < 0) { pos = -(pos + 1); // ok doesn't exist //System.out.println(" seek " + inputToString(inputMode, term)); Int32sRefFSTEnum.InputOutput <T> seekResult; if (random.Next(3) == 0) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekExact term=" + InputToString(inputMode, term)); } seekResult = fstEnum_.SeekExact(term); pos = -1; } else if (random.NextBoolean()) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekFloor term=" + InputToString(inputMode, term)); } seekResult = fstEnum_.SeekFloor(term); pos--; } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekCeil term=" + InputToString(inputMode, term)); } seekResult = fstEnum_.SeekCeil(term); } if (pos != -1 && pos < pairs.Count) { //System.out.println(" got " + inputToString(inputMode,seekResult.input) + " output=" + fst.Outputs.outputToString(seekResult.Output)); Assert.IsNotNull(seekResult, "got null but expected term=" + InputToString(inputMode, pairs[pos].Input)); if (LuceneTestCase.VERBOSE) { Console.WriteLine(" got " + InputToString(inputMode, seekResult.Input)); } Assert.AreEqual(pairs[pos].Input, seekResult.Input, "expected " + InputToString(inputMode, pairs[pos].Input) + " but got " + InputToString(inputMode, seekResult.Input)); Assert.IsTrue(OutputsEqual(pairs[pos].Output, seekResult.Output)); } else { // seeked before start or beyond end //System.out.println("seek=" + seekTerm); Assert.IsNull(seekResult, "expected null but got " + (seekResult == null ? "null" : InputToString(inputMode, seekResult.Input))); if (LuceneTestCase.VERBOSE) { Console.WriteLine(" got null"); } } break; } } } else { // seek to term that does exist: InputOutput <T> pair = pairs[random.Next(pairs.Count)]; Int32sRefFSTEnum.InputOutput <T> seekResult; if (random.Next(3) == 2) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do exists seekExact term=" + InputToString(inputMode, pair.Input)); } seekResult = fstEnum_.SeekExact(pair.Input); } else if (random.NextBoolean()) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do exists seekFloor " + InputToString(inputMode, pair.Input)); } seekResult = fstEnum_.SeekFloor(pair.Input); } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do exists seekCeil " + InputToString(inputMode, pair.Input)); } seekResult = fstEnum_.SeekCeil(pair.Input); } Assert.IsNotNull(seekResult); Assert.AreEqual(pair.Input, seekResult.Input, "got " + InputToString(inputMode, seekResult.Input) + " but expected " + InputToString(inputMode, pair.Input)); Assert.IsTrue(OutputsEqual(pair.Output, seekResult.Output)); } } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: mixed next/seek"); } // test mixed next/seek num_ = LuceneTestCase.AtLeast(random, 100); for (int iter = 0; iter < num_; iter++) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: iter " + iter); } // reset: fstEnum_ = new Int32sRefFSTEnum <T>(fst); int upto = -1; while (true) { bool isDone = false; if (upto == pairs.Count - 1 || random.NextBoolean()) { // next upto++; if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do next"); } isDone = fstEnum_.Next() == null; } else if (upto != -1 && upto < 0.75 * pairs.Count && random.NextBoolean()) { int attempt = 0; for (; attempt < 10; attempt++) { Int32sRef term = ToInt32sRef(GetRandomString(random), inputMode); if (!termsMap.ContainsKey(term) && term.CompareTo(pairs[upto].Input) > 0) { int pos = pairs.BinarySearch(new InputOutput <T>(term, default(T))); Debug.Assert(pos < 0); upto = -(pos + 1); if (random.NextBoolean()) { upto--; Assert.IsTrue(upto != -1); if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekFloor(" + InputToString(inputMode, term) + ")"); } isDone = fstEnum_.SeekFloor(term) == null; } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekCeil(" + InputToString(inputMode, term) + ")"); } isDone = fstEnum_.SeekCeil(term) == null; } break; } } if (attempt == 10) { continue; } } else { int inc = random.Next(pairs.Count - upto - 1); upto += inc; if (upto == -1) { upto = 0; } if (random.NextBoolean()) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do seekCeil(" + InputToString(inputMode, pairs[upto].Input) + ")"); } isDone = fstEnum_.SeekCeil(pairs[upto].Input) == null; } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do seekFloor(" + InputToString(inputMode, pairs[upto].Input) + ")"); } isDone = fstEnum_.SeekFloor(pairs[upto].Input) == null; } } if (LuceneTestCase.VERBOSE) { if (!isDone) { Console.WriteLine(" got " + InputToString(inputMode, fstEnum_.Current.Input)); } else { Console.WriteLine(" got null"); } } if (upto == pairs.Count) { Assert.IsTrue(isDone); break; } else { Assert.IsFalse(isDone); Assert.AreEqual(pairs[upto].Input, fstEnum_.Current.Input); Assert.IsTrue(OutputsEqual(pairs[upto].Output, fstEnum_.Current.Output)); /* * if (upto < pairs.size()-1) { * int tryCount = 0; * while(tryCount < 10) { * final IntsRef t = toIntsRef(getRandomString(), inputMode); * if (pairs.get(upto).input.compareTo(t) < 0) { * final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0; * if (LuceneTestCase.VERBOSE) { * System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected); * } * Assert.AreEqual(expected, fstEnum.beforeNext(t)); * break; * } * tryCount++; * } * } */ } } } }
public virtual void Test() { int[] ints = new int[7]; Int32sRef input = new Int32sRef(ints, 0, ints.Length); int seed = Random.Next(); Directory dir = new MMapDirectory(CreateTempDir("2BFST")); for (int doPackIter = 0; doPackIter < 2; doPackIter++) { bool doPack = doPackIter == 1; // Build FST w/ NoOutputs and stop when nodeCount > 2.2B if (!doPack) { Console.WriteLine("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS"); Outputs <object> outputs = NoOutputs.Singleton; object NO_OUTPUT = outputs.NoOutput; Builder <object> b = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); int count = 0; Random r = new Random(seed); int[] ints2 = new int[200]; Int32sRef input2 = new Int32sRef(ints2, 0, ints2.Length); while (true) { //System.out.println("add: " + input + " -> " + output); for (int i = 10; i < ints2.Length; i++) { ints2[i] = r.Next(256); } b.Add(input2, NO_OUTPUT); count++; if (count % 100000 == 0) { Console.WriteLine(count + ": " + b.GetFstSizeInBytes() + " bytes; " + b.TotStateCount + " nodes"); } if (b.TotStateCount > int.MaxValue + 100L * 1024 * 1024) { break; } NextInput(r, ints2); } FST <object> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints2, 0); r = new Random(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(NO_OUTPUT, Util.Get(fst, input2)); NextInput(r, ints2); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <object> fstEnum = new Int32sRefFSTEnum <object>(fst); Arrays.Fill(ints2, 0); r = new Random(seed); int upto = 0; while (true) { Int32sRefFSTEnum.InputOutput <object> pair = fstEnum.Next(); if (pair == null) { break; } for (int j = 10; j < ints2.Length; j++) { ints2[j] = r.Next(256); } Assert.AreEqual(input2, pair.Input); Assert.AreEqual(NO_OUTPUT, pair.Output); upto++; NextInput(r, ints2); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <object>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ ByteSequenceOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes"); Outputs <BytesRef> outputs = ByteSequenceOutputs.Singleton; Builder <BytesRef> b = new Builder <BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); var outputBytes = new byte[20]; BytesRef output = new BytesRef(outputBytes); Arrays.Fill(ints, 0); int count = 0; Random r = new Random(seed); while (true) { r.NextBytes(outputBytes); //System.out.println("add: " + input + " -> " + output); b.Add(input, BytesRef.DeepCopyOf(output)); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes"); } if (b.GetFstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST <BytesRef> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); r = new Random(seed); Arrays.Fill(ints, 0); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } r.NextBytes(outputBytes); Assert.AreEqual(output, Util.Get(fst, input)); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <BytesRef> fstEnum = new Int32sRefFSTEnum <BytesRef>(fst); Arrays.Fill(ints, 0); r = new Random(seed); int upto = 0; while (true) { Int32sRefFSTEnum.InputOutput <BytesRef> pair = fstEnum.Next(); if (pair == null) { break; } Assert.AreEqual(input, pair.Input); r.NextBytes(outputBytes); Assert.AreEqual(output, pair.Output); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <BytesRef>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } // Build FST w/ PositiveIntOutputs and stop when FST // size = 3GB { Console.WriteLine("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long"); Outputs <long?> outputs = PositiveInt32Outputs.Singleton; Builder <long?> b = new Builder <long?>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, int.MaxValue, outputs, null, doPack, PackedInt32s.COMPACT, true, 15); long output = 1; Arrays.Fill(ints, 0); int count = 0; Random r = new Random(seed); while (true) { //System.out.println("add: " + input + " -> " + output); b.Add(input, output); output += 1 + r.Next(10); count++; if (count % 1000000 == 0) { Console.WriteLine(count + "...: " + b.GetFstSizeInBytes() + " bytes"); } if (b.GetFstSizeInBytes() > LIMIT) { break; } NextInput(r, ints); } FST <long?> fst = b.Finish(); for (int verify = 0; verify < 2; verify++) { Console.WriteLine("\nTEST: now verify [fst size=" + fst.GetSizeInBytes() + "; nodeCount=" + fst.NodeCount + "; arcCount=" + fst.ArcCount + "]"); Arrays.Fill(ints, 0); output = 1; r = new Random(seed); for (int i = 0; i < count; i++) { if (i % 1000000 == 0) { Console.WriteLine(i + "...: "); } // forward lookup: Assert.AreEqual(output, (long)Util.Get(fst, input)); // reverse lookup: Assert.AreEqual(input, Util.GetByOutput(fst, output)); output += 1 + r.Next(10); NextInput(r, ints); } Console.WriteLine("\nTEST: enum all input/outputs"); Int32sRefFSTEnum <long?> fstEnum = new Int32sRefFSTEnum <long?>(fst); Arrays.Fill(ints, 0); r = new Random(seed); int upto = 0; output = 1; while (true) { Int32sRefFSTEnum.InputOutput <long?> pair = fstEnum.Next(); if (pair == null) { break; } Assert.AreEqual(input, pair.Input); Assert.AreEqual(output, pair.Output.Value); output += 1 + r.Next(10); upto++; NextInput(r, ints); } Assert.AreEqual(count, upto); if (verify == 0) { Console.WriteLine("\nTEST: save/load FST and re-verify"); IndexOutput @out = dir.CreateOutput("fst", IOContext.DEFAULT); fst.Save(@out); @out.Dispose(); IndexInput @in = dir.OpenInput("fst", IOContext.DEFAULT); fst = new FST <long?>(@in, outputs); @in.Dispose(); } else { dir.DeleteFile("fst"); } } } } dir.Dispose(); }