public virtual void TestBasic()
        {
            byte[] bytes = new byte[] { 1, 65 };
            ByteArrayDataInput @in = new ByteArrayDataInput(bytes);
            Assert.AreEqual("A", @in.ReadString());

            bytes = new byte[] { 1, 1, 65 };
            @in.Reset(bytes, 1, 2);
            Assert.AreEqual("A", @in.ReadString());
        }
 public SortedSetDocValuesAnonymousInnerClassHelper(Lucene42DocValuesProducer outerInstance, FSTEntry entry, BinaryDocValues docToOrds, FST<long> fst, FST<long>.BytesReader @in, FST<long>.Arc<long> firstArc, FST<long>.Arc<long> scratchArc, IntsRef scratchInts, BytesRefFSTEnum<long> fstEnum, BytesRef @ref, ByteArrayDataInput input)
 {
     this.OuterInstance = outerInstance;
     this.Entry = entry;
     this.DocToOrds = docToOrds;
     this.Fst = fst;
     this.@in = @in;
     this.FirstArc = firstArc;
     this.ScratchArc = scratchArc;
     this.ScratchInts = scratchInts;
     this.FstEnum = fstEnum;
     this.@ref = @ref;
     this.Input = input;
 }
        public override void Build(IInputIterator iterator)
        {
            if (iterator.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            FileInfo tempInput = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".input", OfflineSorter.DefaultTempDir());
            FileInfo tempSorted = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".sorted", OfflineSorter.DefaultTempDir());

            OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
            OfflineSorter.ByteSequencesReader reader = null;
            ExternalRefSorter sorter = null;

            // Push floats up front before sequences to sort them. For now, assume they are non-negative.
            // If negative floats are allowed some trickery needs to be done to find their byte order.
            bool success = false;
            count = 0;
            try
            {
                byte[] buffer = new byte[0];
                ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
                BytesRef spare;
                while ((spare = iterator.Next()) != null)
                {
                    if (spare.Length + 4 >= buffer.Length)
                    {
                        buffer = ArrayUtil.Grow(buffer, spare.Length + 4);
                    }

                    output.Reset(buffer);
                    output.WriteInt(EncodeWeight(iterator.Weight));
                    output.WriteBytes(spare.Bytes, spare.Offset, spare.Length);
                    writer.Write(buffer, 0, output.Position);
                }
                writer.Dispose();

                // We don't know the distribution of scores and we need to bucket them, so we'll sort
                // and divide into equal buckets.
                OfflineSorter.SortInfo info = (new OfflineSorter()).Sort(tempInput, tempSorted);
                tempInput.Delete();
                FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);

                int inputLines = info.Lines;
                reader = new OfflineSorter.ByteSequencesReader(tempSorted);
                long line = 0;
                int previousBucket = 0;
                int previousScore = 0;
                ByteArrayDataInput input = new ByteArrayDataInput();
                BytesRef tmp1 = new BytesRef();
                BytesRef tmp2 = new BytesRef();
                while (reader.Read(tmp1))
                {
                    input.Reset(tmp1.Bytes);
                    int currentScore = input.ReadInt();

                    int bucket;
                    if (line > 0 && currentScore == previousScore)
                    {
                        bucket = previousBucket;
                    }
                    else
                    {
                        bucket = (int)(line * buckets / inputLines);
                    }
                    previousScore = currentScore;
                    previousBucket = bucket;

                    // Only append the input, discard the weight.
                    tmp2.Bytes = tmp1.Bytes;
                    tmp2.Offset = input.Position;
                    tmp2.Length = tmp1.Length - input.Position;
                    builder.Add(tmp2, bucket);

                    line++;
                    count++;
                }

                // The two FSTCompletions share the same automaton.
                this.higherWeightsCompletion = builder.Build();
                this.normalCompletion = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst);

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(reader, writer, sorter);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(reader, writer, sorter);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
        public override SortedSetDocValues GetSortedSet(FieldInfo field)
        {
            FSTEntry entry = Fsts[field.Number];
            if (entry.NumOrds == 0)
            {
                return DocValues.EMPTY_SORTED_SET; // empty FST!
            }
            FST<long> instance;
            lock (this)
            {
                if (!FstInstances.TryGetValue(field.Number, out instance))
                {
                    Data.Seek(entry.Offset);
                    instance = new FST<long>((DataInput)Data, Lucene.Net.Util.Fst.PositiveIntOutputs.Singleton);
                    RamBytesUsed_Renamed.AddAndGet(instance.SizeInBytes());
                    FstInstances[field.Number] = instance;
                }
            }
            BinaryDocValues docToOrds = GetBinary(field);
            FST<long> fst = instance;

            // per-thread resources
            FST<long>.BytesReader @in = fst.BytesReader;
            FST<long>.Arc<long> firstArc = new FST<long>.Arc<long>();
            FST<long>.Arc<long> scratchArc = new FST<long>.Arc<long>();
            IntsRef scratchInts = new IntsRef();
            BytesRefFSTEnum<long> fstEnum = new BytesRefFSTEnum<long>(fst);
            BytesRef @ref = new BytesRef();
            ByteArrayDataInput input = new ByteArrayDataInput();
            return new SortedSetDocValuesAnonymousInnerClassHelper(this, entry, docToOrds, fst, @in, firstArc, scratchArc, scratchInts, fstEnum, @ref, input);
        }
 internal virtual void Reset(int numTerms, int flags, int[] prefixLengths, int[] suffixLengths, int[] termFreqs, int[] positionIndex, int[] positions, int[] startOffsets, int[] lengths, int[] payloadIndex, BytesRef payloads, ByteArrayDataInput @in)
 {
     this.NumTerms = numTerms;
     this.PrefixLengths = prefixLengths;
     this.SuffixLengths = suffixLengths;
     this.TermFreqs = termFreqs;
     this.PositionIndex = positionIndex;
     this.Positions = positions;
     this.StartOffsets = startOffsets;
     this.Lengths = lengths;
     this.PayloadIndex = payloadIndex;
     this.Payloads = payloads;
     this.@in = @in;
     StartPos = @in.Position;
     Reset();
 }
예제 #6
0
 /// <summary>
 /// Constructs a new Stemmer which will use the provided Dictionary to create its stems.
 /// </summary>
 /// <param name="dictionary"> Dictionary that will be used to create the stems </param>
 public Stemmer(Dictionary dictionary)
 {
     this.dictionary = dictionary;
     this.affixReader = new ByteArrayDataInput(dictionary.affixData);
 }
 public SortedSetDocValuesAnonymousInnerClassHelper(FSTEntry fstEntry, BinaryDocValues binaryDocValues, FST<long?> fst1,
     FST.BytesReader @in, FST.Arc<long?> arc, FST.Arc<long?> scratchArc1, IntsRef intsRef, BytesRefFSTEnum<long?> bytesRefFstEnum,
     BytesRef @ref, ByteArrayDataInput byteArrayDataInput)
 {
     entry = fstEntry;
     docToOrds = binaryDocValues;
     fst = fst1;
     this.@in = @in;
     firstArc = arc;
     scratchArc = scratchArc1;
     scratchInts = intsRef;
     fstEnum = bytesRefFstEnum;
     this.@ref = @ref;
     input = byteArrayDataInput;
 }
        public override SortedSetDocValues GetSortedSet(FieldInfo field)
        {
            var entry = fsts[field.Number];
            if (entry.numOrds == 0)
            {
                return DocValues.EMPTY_SORTED_SET; // empty FST!
            }
            FST<long?> instance;
            lock (this)
            {
                instance = fstInstances[field.Number];
                if (instance == null)
                {
                    data.Seek(entry.offset);
                    instance = new FST<long?>(data, PositiveIntOutputs.Singleton);
                    ramBytesUsed.AddAndGet(instance.SizeInBytes());
                    fstInstances[field.Number] = instance;
                }
            }
            var docToOrds = GetBinary(field);
            var fst = instance;

            // per-thread resources
            var @in = fst.BytesReader;
            var firstArc = new FST.Arc<long?>();
            var scratchArc = new FST.Arc<long?>();
            var scratchInts = new IntsRef();
            var fstEnum = new BytesRefFSTEnum<long?>(fst);
            var @ref = new BytesRef();
            var input = new ByteArrayDataInput();
            return new SortedSetDocValuesAnonymousInnerClassHelper(entry, docToOrds, fst, @in, firstArc,
                scratchArc, scratchInts, fstEnum, @ref, input);
        }