public override bool Load(DataInput input)
 {
     lock (this)
     {
         count = input.ReadVInt64();
         this.higherWeightsCompletion = new FSTCompletion(new FST <object>(input, NoOutputs.Singleton));
         this.normalCompletion        = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst);
         return(true);
     }
 }
        public override void SetUp()
        {
            base.SetUp();

            FSTCompletionBuilder builder = new FSTCompletionBuilder();

            foreach (Input tf in EvalKeys())
            {
                builder.Add(tf.term, (int)tf.v);
            }
            completion             = builder.Build();
            completionAlphabetical = new FSTCompletion(completion.FST, false, true);
        }
        public void TestThreeByte()
        {
            //string key = new string(new sbyte[] {
            //    (sbyte) 0xF0, (sbyte) 0xA4, (sbyte) 0xAD, (sbyte) 0xA2}, 0, 4, Encoding.UTF8);
            string key = Encoding.UTF8.GetString(new byte[] { 0xF0, 0xA4, 0xAD, 0xA2 });
            FSTCompletionBuilder builder = new FSTCompletionBuilder();

            builder.Add(new BytesRef(key), 0);

            FSTCompletion lookup = builder.Build();
            IEnumerable <FSTCompletion.Completion> result = lookup.DoLookup(StringToCharSequence(key).ToString(), 1);

            assertEquals(1, result.Count());
        }
 public override bool Load(DataInput input)
 {
     UninterruptableMonitor.Enter(this);
     try
     {
         count = input.ReadVInt64();
         this.higherWeightsCompletion = new FSTCompletion(new FST <object>(input, NoOutputs.Singleton));
         this.normalCompletion        = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst);
         return(true);
     }
     finally
     {
         UninterruptableMonitor.Exit(this);
     }
 }
Exemple #5
0
        // LUCENENET specific - renaming from Main() because we must only have 1 entry point.
        // Not sure why this utility is in a test project anyway - this seems like something that should
        // be in Lucene.Net.Suggest so we can put it into the lucene-cli tool.
        public static void Main2(string[] args)
        {
            FileInfo input = new FileInfo("/home/dweiss/tmp/shuffled.dict");

            int buckets      = 20;
            int shareMaxTail = 10;

            ExternalRefSorter    sorter  = new ExternalRefSorter(new OfflineSorter());
            FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter, shareMaxTail);

            TextReader reader =
                new StreamReader(
                    new FileStream(input.FullName, FileMode.Open), Encoding.UTF8);

            BytesRef scratch = new BytesRef();
            string   line;
            int      count = 0;

            while ((line = reader.ReadLine()) != null)
            {
                scratch.CopyChars(line);
                builder.Add(scratch, count % buckets);
                if ((count++ % 100000) == 0)
                {
                    Console.WriteLine("Line: " + count);
                }
            }

            Console.WriteLine("Building FSTCompletion.");
            FSTCompletion completion = builder.Build();

            FileInfo fstFile = new FileInfo("completion.fst");

            Console.WriteLine("Done. Writing automaton: " + fstFile.FullName);
            completion.FST.Save(fstFile);
            sorter.Dispose();
        }
        public void TestRequestedCount()
        {
            // 'one' is promoted after collecting two higher ranking results.
            AssertMatchEquals(completion.DoLookup(StringToCharSequence("one").ToString(), 2),
                              "one/0.0",
                              "oneness/1.0");

            // 'four' is collected in a bucket and then again as an exact match.
            AssertMatchEquals(completion.DoLookup(StringToCharSequence("four").ToString(), 2),
                              "four/0.0",
                              "fourblah/1.0");

            // Check reordering of exact matches.
            AssertMatchEquals(completion.DoLookup(StringToCharSequence("four").ToString(), 4),
                              "four/0.0",
                              "fourblah/1.0",
                              "fourteen/1.0",
                              "fourier/0.0");

            // 'one' is at the top after collecting all alphabetical results.
            AssertMatchEquals(completionAlphabetical.DoLookup(StringToCharSequence("one").ToString(), 2),
                              "one/0.0",
                              "oneness/1.0");

            // 'one' is not promoted after collecting two higher ranking results.
            FSTCompletion noPromotion = new FSTCompletion(completion.FST, true, false);

            AssertMatchEquals(noPromotion.DoLookup(StringToCharSequence("one").ToString(), 2),
                              "oneness/1.0",
                              "onerous/1.0");

            // 'one' is at the top after collecting all alphabetical results.
            AssertMatchEquals(completionAlphabetical.DoLookup(StringToCharSequence("one").ToString(), 2),
                              "one/0.0",
                              "oneness/1.0");
        }
 public void TestEmptyInput()
 {
     completion = new FSTCompletionBuilder().Build();
     AssertMatchEquals(completion.DoLookup(StringToCharSequence("").ToString(), 10));
 }
        public override void Build(IInputIterator iterator)
        {
            if (iterator.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            FileInfo tempInput  = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".input", OfflineSorter.DefaultTempDir());
            FileInfo tempSorted = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".sorted", OfflineSorter.DefaultTempDir());

            OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
            OfflineSorter.ByteSequencesReader reader = null;
            ExternalRefSorter sorter = null;

            // Push floats up front before sequences to sort them. For now, assume they are non-negative.
            // If negative floats are allowed some trickery needs to be done to find their byte order.
            bool success = false;

            count = 0;
            try
            {
                byte[] buffer = new byte[0];
                ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
                BytesRef            spare;
                while ((spare = iterator.Next()) != null)
                {
                    if (spare.Length + 4 >= buffer.Length)
                    {
                        buffer = ArrayUtil.Grow(buffer, spare.Length + 4);
                    }

                    output.Reset(buffer);
                    output.WriteInt32(EncodeWeight(iterator.Weight));
                    output.WriteBytes(spare.Bytes, spare.Offset, spare.Length);
                    writer.Write(buffer, 0, output.Position);
                }
                writer.Dispose();

                // We don't know the distribution of scores and we need to bucket them, so we'll sort
                // and divide into equal buckets.
                OfflineSorter.SortInfo info = (new OfflineSorter()).Sort(tempInput, tempSorted);
                tempInput.Delete();
                FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);

                int inputLines = info.Lines;
                reader = new OfflineSorter.ByteSequencesReader(tempSorted);
                long line                = 0;
                int  previousBucket      = 0;
                int  previousScore       = 0;
                ByteArrayDataInput input = new ByteArrayDataInput();
                BytesRef           tmp1  = new BytesRef();
                BytesRef           tmp2  = new BytesRef();
                while (reader.Read(tmp1))
                {
                    input.Reset(tmp1.Bytes);
                    int currentScore = input.ReadInt32();

                    int bucket;
                    if (line > 0 && currentScore == previousScore)
                    {
                        bucket = previousBucket;
                    }
                    else
                    {
                        bucket = (int)(line * buckets / inputLines);
                    }
                    previousScore  = currentScore;
                    previousBucket = bucket;

                    // Only append the input, discard the weight.
                    tmp2.Bytes  = tmp1.Bytes;
                    tmp2.Offset = input.Position;
                    tmp2.Length = tmp1.Length - input.Position;
                    builder.Add(tmp2, bucket);

                    line++;
                    count++;
                }

                // The two FSTCompletions share the same automaton.
                this.higherWeightsCompletion = builder.Build();
                this.normalCompletion        = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst);

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(reader, writer, sorter);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(reader, writer, sorter);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
 /// <summary>
 /// This constructor takes a pre-built automaton.
 /// </summary>
 ///  <param name="completion">
 ///          An instance of <see cref="FSTCompletion"/>. </param>
 ///  <param name="exactMatchFirst">
 ///          If <code>true</code> exact matches are promoted to the top of the
 ///          suggestions list. Otherwise they appear in the order of
 ///          discretized weight and alphabetical within the bucket. </param>
 public FSTCompletionLookup(FSTCompletion completion, bool exactMatchFirst)
     : this(INVALID_BUCKETS_COUNT, exactMatchFirst)
 {
     this.normalCompletion        = new FSTCompletion(completion.FST, false, exactMatchFirst);
     this.higherWeightsCompletion = new FSTCompletion(completion.FST, true, exactMatchFirst);
 }