public FstFieldWriter(FieldInfo fieldInfo, long termsFilePointer, VariableGapTermsIndexWriter vgtiw) { _vgtiw = vgtiw; FieldInfo = fieldInfo; PositiveIntOutputs fstOutputs = PositiveIntOutputs.Singleton; _fstBuilder = new Builder <long>(FST.INPUT_TYPE.BYTE1, fstOutputs); IndexStart = _vgtiw.Output.FilePointer; // Always put empty string in _fstBuilder.Add(new IntsRef(), termsFilePointer); _startTermsFilePointer = termsFilePointer; }
internal TermsWriter(FSTOrdTermsWriter outerInstance, FieldInfo fieldInfo) { this.outerInstance = outerInstance; this.numTerms = 0; this.fieldInfo = fieldInfo; this.longsSize = outerInstance.postingsWriter.setField(fieldInfo); this.outputs = PositiveIntOutputs.Singleton; this.builder = new Builder <>(FST.INPUT_TYPE.BYTE1, outputs); this.lastBlockStatsFP = 0; this.lastBlockMetaLongsFP = 0; this.lastBlockMetaBytesFP = 0; this.lastBlockLongs = new long[longsSize]; this.lastLongs = new long[longsSize]; this.lastMetaBytesFP = 0; }
internal TermsWriter(FSTOrdTermsWriter outerInstance, FieldInfo fieldInfo) { _outerInstance = outerInstance; _numTerms = 0; _fieldInfo = fieldInfo; _longsSize = outerInstance.postingsWriter.SetField(fieldInfo); _outputs = PositiveIntOutputs.Singleton; _builder = new Builder <long>(FST.INPUT_TYPE.BYTE1, _outputs); _lastBlockStatsFp = 0; _lastBlockMetaLongsFp = 0; _lastBlockMetaBytesFp = 0; _lastBlockLongs = new long[_longsSize]; _lastLongs = new long[_longsSize]; _lastMetaBytesFp = 0; }
private void WriteFST(FieldInfo field, IEnumerable <BytesRef> values) { Meta.WriteVInt(field.Number); Meta.WriteByte(Lucene42DocValuesProducer.FST); Meta.WriteLong(Data.FilePointer); PositiveIntOutputs outputs = PositiveIntOutputs.Singleton; Builder <long> builder = new Builder <long>(INPUT_TYPE.BYTE1, outputs); IntsRef scratch = new IntsRef(); long ord = 0; foreach (BytesRef v in values) { builder.Add(Util.ToIntsRef(v, scratch), ord); ord++; } Lucene.Net.Util.Fst.FST <long> fst = builder.Finish(); if (fst != null) { fst.Save(Data); } Meta.WriteVLong(ord); }
public override void Build(InputIterator iterator) { if (iterator.HasPayloads) { throw new ArgumentException("this suggester doesn't support payloads"); } if (iterator.HasContexts) { throw new ArgumentException("this suggester doesn't support contexts"); } count = 0; BytesRef scratch = new BytesRef(); InputIterator iter = new WFSTInputIterator(this, iterator); IntsRef scratchInts = new IntsRef(); BytesRef previous = null; PositiveIntOutputs outputs = PositiveIntOutputs.Singleton; Builder <long?> builder = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs); while ((scratch = iter.Next()) != null) { long cost = iter.Weight; if (previous == null) { previous = new BytesRef(); } else if (scratch.Equals(previous)) { continue; // for duplicate suggestions, the best weight is actually // added } Util.ToIntsRef(scratch, scratchInts); builder.Add(scratchInts, cost); previous.CopyBytes(scratch); count++; } fst = builder.Finish(); }
private void WriteFST(FieldInfo field, IEnumerable <BytesRef> values) { meta.WriteVInt(field.Number); meta.WriteByte(FST); meta.WriteLong(data.FilePointer); PositiveIntOutputs outputs = PositiveIntOutputs.Singleton; var builder = new Builder <long?>(INPUT_TYPE.BYTE1, outputs); var scratch = new IntsRef(); long ord = 0; foreach (BytesRef v in values) { builder.Add(Util.ToIntsRef(v, scratch), ord); ord++; } FST <long?> fst = builder.Finish(); if (fst != null) { fst.Save(data); } meta.WriteVLong(ord); }
internal TermsWriter(FSTOrdTermsWriter outerInstance, FieldInfo fieldInfo) { _outerInstance = outerInstance; _numTerms = 0; _fieldInfo = fieldInfo; _longsSize = outerInstance.postingsWriter.SetField(fieldInfo); _outputs = PositiveIntOutputs.Singleton; _builder = new Builder<long>(FST.INPUT_TYPE.BYTE1, _outputs); _lastBlockStatsFp = 0; _lastBlockMetaLongsFp = 0; _lastBlockMetaBytesFp = 0; _lastBlockLongs = new long[_longsSize]; _lastLongs = new long[_longsSize]; _lastMetaBytesFp = 0; }
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) { this.fieldInfo = fieldInfo; fstOutputs = PositiveIntOutputs.Singleton; fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, fstOutputs); indexStart = output.FilePointer; ////System.out.println("VGW: field=" + fieldInfo.name); // Always put empty string in fstBuilder.Add(new IntsRef(), termsFilePointer); startTermsFilePointer = termsFilePointer; }
internal virtual void LoadTerms() { PositiveIntOutputs posIntOutputs = PositiveIntOutputs.Singleton; Builder <PairOutputs.Pair <long?, PairOutputs.Pair <long?, long?> > > b; PairOutputs <long?, long?> outputsInner = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs); PairOutputs <long?, PairOutputs.Pair <long?, long?> > outputs = new PairOutputs <long?, PairOutputs.Pair <long?, long?> >(posIntOutputs, outputsInner); b = new Builder <>(FST.INPUT_TYPE.BYTE1, outputs); IndexInput @in = (IndexInput)outerInstance._input.Clone(); @in.Seek(termsStart); BytesRef lastTerm = new BytesRef(10); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; FixedBitSet visitedDocs = new FixedBitSet(maxDoc); IntsRef scratchIntsRef = new IntsRef(); while (true) { SimpleTextUtil.ReadLine(@in, scratch); if (scratch.Equals(END) || StringHelper.StartsWith(scratch, FIELD)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq))); sumTotalTermFreq += totalTermFreq; } break; } else if (StringHelper.StartsWith(scratch, DOC)) { docFreq++; sumDocFreq++; UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length, scratchUTF16); int docID = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); visitedDocs.Set(docID); } else if (StringHelper.StartsWith(scratch, FREQ)) { UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length, scratch.Length - FREQ.length, scratchUTF16); totalTermFreq += ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length); } else if (StringHelper.StartsWith(scratch, TERM)) { if (lastDocsStart != -1) { b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef), outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq))); } lastDocsStart = @in.FilePointer; int len = scratch.Length - TERM.length; if (len > lastTerm.Length) { lastTerm.Grow(len); } Array.Copy(scratch.Bytes, TERM.length, lastTerm.Bytes, 0, len); lastTerm.Length = len; docFreq = 0; sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; termCount++; } } docCount = visitedDocs.Cardinality(); fst = b.Finish(); }