コード例 #1
0
        public override void SetUp()
        {
            base.SetUp();

            FSTCompletionBuilder builder = new FSTCompletionBuilder();
            foreach (Input tf in EvalKeys())
            {
                builder.Add(tf.term, (int)tf.v);
            }
            completion = builder.Build();
            completionAlphabetical = new FSTCompletion(completion.FST, false, true);
        }
コード例 #2
0
 public override bool Load(DataInput input)
 {
     lock (this)
     {
         count = input.ReadVLong();
         this.higherWeightsCompletion = new FSTCompletion(new FST<object>(input, NoOutputs.Singleton));
         this.normalCompletion = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst);
         return true;
     }
 }
コード例 #3
0
        public override void Build(IInputIterator iterator)
        {
            if (iterator.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            FileInfo tempInput = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".input", OfflineSorter.DefaultTempDir());
            FileInfo tempSorted = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".sorted", OfflineSorter.DefaultTempDir());

            OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
            OfflineSorter.ByteSequencesReader reader = null;
            ExternalRefSorter sorter = null;

            // Push floats up front before sequences to sort them. For now, assume they are non-negative.
            // If negative floats are allowed some trickery needs to be done to find their byte order.
            bool success = false;
            count = 0;
            try
            {
                byte[] buffer = new byte[0];
                ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
                BytesRef spare;
                while ((spare = iterator.Next()) != null)
                {
                    if (spare.Length + 4 >= buffer.Length)
                    {
                        buffer = ArrayUtil.Grow(buffer, spare.Length + 4);
                    }

                    output.Reset(buffer);
                    output.WriteInt(EncodeWeight(iterator.Weight));
                    output.WriteBytes(spare.Bytes, spare.Offset, spare.Length);
                    writer.Write(buffer, 0, output.Position);
                }
                writer.Dispose();

                // We don't know the distribution of scores and we need to bucket them, so we'll sort
                // and divide into equal buckets.
                OfflineSorter.SortInfo info = (new OfflineSorter()).Sort(tempInput, tempSorted);
                tempInput.Delete();
                FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);

                int inputLines = info.Lines;
                reader = new OfflineSorter.ByteSequencesReader(tempSorted);
                long line = 0;
                int previousBucket = 0;
                int previousScore = 0;
                ByteArrayDataInput input = new ByteArrayDataInput();
                BytesRef tmp1 = new BytesRef();
                BytesRef tmp2 = new BytesRef();
                while (reader.Read(tmp1))
                {
                    input.Reset(tmp1.Bytes);
                    int currentScore = input.ReadInt();

                    int bucket;
                    if (line > 0 && currentScore == previousScore)
                    {
                        bucket = previousBucket;
                    }
                    else
                    {
                        bucket = (int)(line * buckets / inputLines);
                    }
                    previousScore = currentScore;
                    previousBucket = bucket;

                    // Only append the input, discard the weight.
                    tmp2.Bytes = tmp1.Bytes;
                    tmp2.Offset = input.Position;
                    tmp2.Length = tmp1.Length - input.Position;
                    builder.Add(tmp2, bucket);

                    line++;
                    count++;
                }

                // The two FSTCompletions share the same automaton.
                this.higherWeightsCompletion = builder.Build();
                this.normalCompletion = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst);

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(reader, writer, sorter);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(reader, writer, sorter);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
コード例 #4
0
 /// <summary>
 /// This constructor takes a pre-built automaton.
 /// </summary>
 ///  <param name="completion"> 
 ///          An instance of <see cref="FSTCompletion"/>. </param>
 ///  <param name="exactMatchFirst">
 ///          If <code>true</code> exact matches are promoted to the top of the
 ///          suggestions list. Otherwise they appear in the order of
 ///          discretized weight and alphabetical within the bucket. </param>
 public FSTCompletionLookup(FSTCompletion completion, bool exactMatchFirst)
     : this(INVALID_BUCKETS_COUNT, exactMatchFirst)
 {
     this.normalCompletion = new FSTCompletion(completion.FST, false, exactMatchFirst);
     this.higherWeightsCompletion = new FSTCompletion(completion.FST, true, exactMatchFirst);
 }
コード例 #5
0
 public void TestEmptyInput()
 {
     completion = new FSTCompletionBuilder().Build();
     AssertMatchEquals(completion.DoLookup(StringToCharSequence("").ToString(), 10));
 }
コード例 #6
0
        public void TestRequestedCount()
        {
            // 'one' is promoted after collecting two higher ranking results.
            AssertMatchEquals(completion.DoLookup(StringToCharSequence("one").ToString(), 2),
                "one/0.0",
                "oneness/1.0");

            // 'four' is collected in a bucket and then again as an exact match. 
            AssertMatchEquals(completion.DoLookup(StringToCharSequence("four").ToString(), 2),
                "four/0.0",
                "fourblah/1.0");

            // Check reordering of exact matches. 
            AssertMatchEquals(completion.DoLookup(StringToCharSequence("four").ToString(), 4),
                "four/0.0",
                "fourblah/1.0",
                "fourteen/1.0",
                "fourier/0.0");

            // 'one' is at the top after collecting all alphabetical results.
            AssertMatchEquals(completionAlphabetical.DoLookup(StringToCharSequence("one").ToString(), 2),
                "one/0.0",
                "oneness/1.0");

            // 'one' is not promoted after collecting two higher ranking results.
            FSTCompletion noPromotion = new FSTCompletion(completion.FST, true, false);
            AssertMatchEquals(noPromotion.DoLookup(StringToCharSequence("one").ToString(), 2),
                "oneness/1.0",
                "onerous/1.0");

            // 'one' is at the top after collecting all alphabetical results. 
            AssertMatchEquals(completionAlphabetical.DoLookup(StringToCharSequence("one").ToString(), 2),
                "one/0.0",
                "oneness/1.0");
        }