public virtual void TestBasic() { byte[] bytes = new byte[] { 1, 65 }; ByteArrayDataInput @in = new ByteArrayDataInput(bytes); Assert.AreEqual("A", @in.ReadString()); bytes = new byte[] { 1, 1, 65 }; @in.Reset(bytes, 1, 2); Assert.AreEqual("A", @in.ReadString()); }
public SortedSetDocValuesAnonymousInnerClassHelper(Lucene42DocValuesProducer outerInstance, FSTEntry entry, BinaryDocValues docToOrds, FST<long> fst, FST<long>.BytesReader @in, FST<long>.Arc<long> firstArc, FST<long>.Arc<long> scratchArc, IntsRef scratchInts, BytesRefFSTEnum<long> fstEnum, BytesRef @ref, ByteArrayDataInput input) { this.OuterInstance = outerInstance; this.Entry = entry; this.DocToOrds = docToOrds; this.Fst = fst; this.@in = @in; this.FirstArc = firstArc; this.ScratchArc = scratchArc; this.ScratchInts = scratchInts; this.FstEnum = fstEnum; this.@ref = @ref; this.Input = input; }
public override void Build(IInputIterator iterator) { if (iterator.HasPayloads) { throw new System.ArgumentException("this suggester doesn't support payloads"); } if (iterator.HasContexts) { throw new System.ArgumentException("this suggester doesn't support contexts"); } FileInfo tempInput = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".input", OfflineSorter.DefaultTempDir()); FileInfo tempSorted = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".sorted", OfflineSorter.DefaultTempDir()); OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput); OfflineSorter.ByteSequencesReader reader = null; ExternalRefSorter sorter = null; // Push floats up front before sequences to sort them. For now, assume they are non-negative. // If negative floats are allowed some trickery needs to be done to find their byte order. bool success = false; count = 0; try { byte[] buffer = new byte[0]; ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); BytesRef spare; while ((spare = iterator.Next()) != null) { if (spare.Length + 4 >= buffer.Length) { buffer = ArrayUtil.Grow(buffer, spare.Length + 4); } output.Reset(buffer); output.WriteInt(EncodeWeight(iterator.Weight)); output.WriteBytes(spare.Bytes, spare.Offset, spare.Length); writer.Write(buffer, 0, output.Position); } writer.Dispose(); // We don't know the distribution of scores and we need to bucket them, so we'll sort // and divide into equal buckets. OfflineSorter.SortInfo info = (new OfflineSorter()).Sort(tempInput, tempSorted); tempInput.Delete(); FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength); int inputLines = info.Lines; reader = new OfflineSorter.ByteSequencesReader(tempSorted); long line = 0; int previousBucket = 0; int previousScore = 0; ByteArrayDataInput input = new ByteArrayDataInput(); BytesRef tmp1 = new BytesRef(); BytesRef tmp2 = new BytesRef(); while (reader.Read(tmp1)) { input.Reset(tmp1.Bytes); int currentScore = input.ReadInt(); int bucket; if (line > 0 && currentScore == previousScore) { bucket = previousBucket; } else { bucket = (int)(line * buckets / inputLines); } previousScore = currentScore; previousBucket = bucket; // Only append the input, discard the weight. tmp2.Bytes = tmp1.Bytes; tmp2.Offset = input.Position; tmp2.Length = tmp1.Length - input.Position; builder.Add(tmp2, bucket); line++; count++; } // The two FSTCompletions share the same automaton. this.higherWeightsCompletion = builder.Build(); this.normalCompletion = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst); success = true; } finally { if (success) { IOUtils.Close(reader, writer, sorter); } else { IOUtils.CloseWhileHandlingException(reader, writer, sorter); } tempInput.Delete(); tempSorted.Delete(); } }
public override SortedSetDocValues GetSortedSet(FieldInfo field) { FSTEntry entry = Fsts[field.Number]; if (entry.NumOrds == 0) { return DocValues.EMPTY_SORTED_SET; // empty FST! } FST<long> instance; lock (this) { if (!FstInstances.TryGetValue(field.Number, out instance)) { Data.Seek(entry.Offset); instance = new FST<long>((DataInput)Data, Lucene.Net.Util.Fst.PositiveIntOutputs.Singleton); RamBytesUsed_Renamed.AddAndGet(instance.SizeInBytes()); FstInstances[field.Number] = instance; } } BinaryDocValues docToOrds = GetBinary(field); FST<long> fst = instance; // per-thread resources FST<long>.BytesReader @in = fst.BytesReader; FST<long>.Arc<long> firstArc = new FST<long>.Arc<long>(); FST<long>.Arc<long> scratchArc = new FST<long>.Arc<long>(); IntsRef scratchInts = new IntsRef(); BytesRefFSTEnum<long> fstEnum = new BytesRefFSTEnum<long>(fst); BytesRef @ref = new BytesRef(); ByteArrayDataInput input = new ByteArrayDataInput(); return new SortedSetDocValuesAnonymousInnerClassHelper(this, entry, docToOrds, fst, @in, firstArc, scratchArc, scratchInts, fstEnum, @ref, input); }
internal virtual void Reset(int numTerms, int flags, int[] prefixLengths, int[] suffixLengths, int[] termFreqs, int[] positionIndex, int[] positions, int[] startOffsets, int[] lengths, int[] payloadIndex, BytesRef payloads, ByteArrayDataInput @in) { this.NumTerms = numTerms; this.PrefixLengths = prefixLengths; this.SuffixLengths = suffixLengths; this.TermFreqs = termFreqs; this.PositionIndex = positionIndex; this.Positions = positions; this.StartOffsets = startOffsets; this.Lengths = lengths; this.PayloadIndex = payloadIndex; this.Payloads = payloads; this.@in = @in; StartPos = @in.Position; Reset(); }
/// <summary> /// Constructs a new Stemmer which will use the provided Dictionary to create its stems. /// </summary> /// <param name="dictionary"> Dictionary that will be used to create the stems </param> public Stemmer(Dictionary dictionary) { this.dictionary = dictionary; this.affixReader = new ByteArrayDataInput(dictionary.affixData); }
public SortedSetDocValuesAnonymousInnerClassHelper(FSTEntry fstEntry, BinaryDocValues binaryDocValues, FST<long?> fst1, FST.BytesReader @in, FST.Arc<long?> arc, FST.Arc<long?> scratchArc1, IntsRef intsRef, BytesRefFSTEnum<long?> bytesRefFstEnum, BytesRef @ref, ByteArrayDataInput byteArrayDataInput) { entry = fstEntry; docToOrds = binaryDocValues; fst = fst1; this.@in = @in; firstArc = arc; scratchArc = scratchArc1; scratchInts = intsRef; fstEnum = bytesRefFstEnum; this.@ref = @ref; input = byteArrayDataInput; }
public override SortedSetDocValues GetSortedSet(FieldInfo field) { var entry = fsts[field.Number]; if (entry.numOrds == 0) { return DocValues.EMPTY_SORTED_SET; // empty FST! } FST<long?> instance; lock (this) { instance = fstInstances[field.Number]; if (instance == null) { data.Seek(entry.offset); instance = new FST<long?>(data, PositiveIntOutputs.Singleton); ramBytesUsed.AddAndGet(instance.SizeInBytes()); fstInstances[field.Number] = instance; } } var docToOrds = GetBinary(field); var fst = instance; // per-thread resources var @in = fst.BytesReader; var firstArc = new FST.Arc<long?>(); var scratchArc = new FST.Arc<long?>(); var scratchInts = new IntsRef(); var fstEnum = new BytesRefFSTEnum<long?>(fst); var @ref = new BytesRef(); var input = new ByteArrayDataInput(); return new SortedSetDocValuesAnonymousInnerClassHelper(entry, docToOrds, fst, @in, firstArc, scratchArc, scratchInts, fstEnum, @ref, input); }