public override void SetUp() { base.SetUp(); FSTCompletionBuilder builder = new FSTCompletionBuilder(); foreach (Input tf in EvalKeys()) { builder.Add(tf.term, (int)tf.v); } completion = builder.Build(); completionAlphabetical = new FSTCompletion(completion.FST, false, true); }
public override bool Load(DataInput input) { lock (this) { count = input.ReadVLong(); this.higherWeightsCompletion = new FSTCompletion(new FST<object>(input, NoOutputs.Singleton)); this.normalCompletion = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst); return true; } }
public override void Build(IInputIterator iterator) { if (iterator.HasPayloads) { throw new System.ArgumentException("this suggester doesn't support payloads"); } if (iterator.HasContexts) { throw new System.ArgumentException("this suggester doesn't support contexts"); } FileInfo tempInput = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".input", OfflineSorter.DefaultTempDir()); FileInfo tempSorted = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".sorted", OfflineSorter.DefaultTempDir()); OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput); OfflineSorter.ByteSequencesReader reader = null; ExternalRefSorter sorter = null; // Push floats up front before sequences to sort them. For now, assume they are non-negative. // If negative floats are allowed some trickery needs to be done to find their byte order. bool success = false; count = 0; try { byte[] buffer = new byte[0]; ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); BytesRef spare; while ((spare = iterator.Next()) != null) { if (spare.Length + 4 >= buffer.Length) { buffer = ArrayUtil.Grow(buffer, spare.Length + 4); } output.Reset(buffer); output.WriteInt(EncodeWeight(iterator.Weight)); output.WriteBytes(spare.Bytes, spare.Offset, spare.Length); writer.Write(buffer, 0, output.Position); } writer.Dispose(); // We don't know the distribution of scores and we need to bucket them, so we'll sort // and divide into equal buckets. OfflineSorter.SortInfo info = (new OfflineSorter()).Sort(tempInput, tempSorted); tempInput.Delete(); FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength); int inputLines = info.Lines; reader = new OfflineSorter.ByteSequencesReader(tempSorted); long line = 0; int previousBucket = 0; int previousScore = 0; ByteArrayDataInput input = new ByteArrayDataInput(); BytesRef tmp1 = new BytesRef(); BytesRef tmp2 = new BytesRef(); while (reader.Read(tmp1)) { input.Reset(tmp1.Bytes); int currentScore = input.ReadInt(); int bucket; if (line > 0 && currentScore == previousScore) { bucket = previousBucket; } else { bucket = (int)(line * buckets / inputLines); } previousScore = currentScore; previousBucket = bucket; // Only append the input, discard the weight. tmp2.Bytes = tmp1.Bytes; tmp2.Offset = input.Position; tmp2.Length = tmp1.Length - input.Position; builder.Add(tmp2, bucket); line++; count++; } // The two FSTCompletions share the same automaton. this.higherWeightsCompletion = builder.Build(); this.normalCompletion = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst); success = true; } finally { if (success) { IOUtils.Close(reader, writer, sorter); } else { IOUtils.CloseWhileHandlingException(reader, writer, sorter); } tempInput.Delete(); tempSorted.Delete(); } }
/// <summary> /// This constructor takes a pre-built automaton. /// </summary> /// <param name="completion"> /// An instance of <see cref="FSTCompletion"/>. </param> /// <param name="exactMatchFirst"> /// If <code>true</code> exact matches are promoted to the top of the /// suggestions list. Otherwise they appear in the order of /// discretized weight and alphabetical within the bucket. </param> public FSTCompletionLookup(FSTCompletion completion, bool exactMatchFirst) : this(INVALID_BUCKETS_COUNT, exactMatchFirst) { this.normalCompletion = new FSTCompletion(completion.FST, false, exactMatchFirst); this.higherWeightsCompletion = new FSTCompletion(completion.FST, true, exactMatchFirst); }
public void TestEmptyInput() { completion = new FSTCompletionBuilder().Build(); AssertMatchEquals(completion.DoLookup(StringToCharSequence("").ToString(), 10)); }
public void TestRequestedCount() { // 'one' is promoted after collecting two higher ranking results. AssertMatchEquals(completion.DoLookup(StringToCharSequence("one").ToString(), 2), "one/0.0", "oneness/1.0"); // 'four' is collected in a bucket and then again as an exact match. AssertMatchEquals(completion.DoLookup(StringToCharSequence("four").ToString(), 2), "four/0.0", "fourblah/1.0"); // Check reordering of exact matches. AssertMatchEquals(completion.DoLookup(StringToCharSequence("four").ToString(), 4), "four/0.0", "fourblah/1.0", "fourteen/1.0", "fourier/0.0"); // 'one' is at the top after collecting all alphabetical results. AssertMatchEquals(completionAlphabetical.DoLookup(StringToCharSequence("one").ToString(), 2), "one/0.0", "oneness/1.0"); // 'one' is not promoted after collecting two higher ranking results. FSTCompletion noPromotion = new FSTCompletion(completion.FST, true, false); AssertMatchEquals(noPromotion.DoLookup(StringToCharSequence("one").ToString(), 2), "oneness/1.0", "onerous/1.0"); // 'one' is at the top after collecting all alphabetical results. AssertMatchEquals(completionAlphabetical.DoLookup(StringToCharSequence("one").ToString(), 2), "one/0.0", "oneness/1.0"); }