public FstFieldWriter(FieldInfo fieldInfo, long termsFilePointer, VariableGapTermsIndexWriter vgtiw)
            {
                _vgtiw    = vgtiw;
                FieldInfo = fieldInfo;
                PositiveIntOutputs fstOutputs = PositiveIntOutputs.Singleton;

                _fstBuilder = new Builder <long>(FST.INPUT_TYPE.BYTE1, fstOutputs);
                IndexStart  = _vgtiw.Output.FilePointer;

                // Always put empty string in
                _fstBuilder.Add(new IntsRef(), termsFilePointer);
                _startTermsFilePointer = termsFilePointer;
            }
Пример #2
0
            internal TermsWriter(FSTOrdTermsWriter outerInstance, FieldInfo fieldInfo)
            {
                this.outerInstance = outerInstance;
                this.numTerms      = 0;
                this.fieldInfo     = fieldInfo;
                this.longsSize     = outerInstance.postingsWriter.setField(fieldInfo);
                this.outputs       = PositiveIntOutputs.Singleton;
                this.builder       = new Builder <>(FST.INPUT_TYPE.BYTE1, outputs);

                this.lastBlockStatsFP     = 0;
                this.lastBlockMetaLongsFP = 0;
                this.lastBlockMetaBytesFP = 0;
                this.lastBlockLongs       = new long[longsSize];

                this.lastLongs       = new long[longsSize];
                this.lastMetaBytesFP = 0;
            }
Пример #3
0
            internal TermsWriter(FSTOrdTermsWriter outerInstance, FieldInfo fieldInfo)
            {
                _outerInstance = outerInstance;
                _numTerms      = 0;
                _fieldInfo     = fieldInfo;
                _longsSize     = outerInstance.postingsWriter.SetField(fieldInfo);
                _outputs       = PositiveIntOutputs.Singleton;
                _builder       = new Builder <long>(FST.INPUT_TYPE.BYTE1, _outputs);

                _lastBlockStatsFp     = 0;
                _lastBlockMetaLongsFp = 0;
                _lastBlockMetaBytesFp = 0;
                _lastBlockLongs       = new long[_longsSize];

                _lastLongs       = new long[_longsSize];
                _lastMetaBytesFp = 0;
            }
        private void WriteFST(FieldInfo field, IEnumerable <BytesRef> values)
        {
            Meta.WriteVInt(field.Number);
            Meta.WriteByte(Lucene42DocValuesProducer.FST);
            Meta.WriteLong(Data.FilePointer);
            PositiveIntOutputs outputs = PositiveIntOutputs.Singleton;
            Builder <long>     builder = new Builder <long>(INPUT_TYPE.BYTE1, outputs);
            IntsRef            scratch = new IntsRef();
            long ord = 0;

            foreach (BytesRef v in values)
            {
                builder.Add(Util.ToIntsRef(v, scratch), ord);
                ord++;
            }
            Lucene.Net.Util.Fst.FST <long> fst = builder.Finish();
            if (fst != null)
            {
                fst.Save(Data);
            }
            Meta.WriteVLong(ord);
        }
Пример #5
0
        public override void Build(InputIterator iterator)
        {
            if (iterator.HasPayloads)
            {
                throw new ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            count = 0;
            BytesRef           scratch     = new BytesRef();
            InputIterator      iter        = new WFSTInputIterator(this, iterator);
            IntsRef            scratchInts = new IntsRef();
            BytesRef           previous    = null;
            PositiveIntOutputs outputs     = PositiveIntOutputs.Singleton;
            Builder <long?>    builder     = new Builder <long?>(FST.INPUT_TYPE.BYTE1, outputs);

            while ((scratch = iter.Next()) != null)
            {
                long cost = iter.Weight;

                if (previous == null)
                {
                    previous = new BytesRef();
                }
                else if (scratch.Equals(previous))
                {
                    continue; // for duplicate suggestions, the best weight is actually
                    // added
                }
                Util.ToIntsRef(scratch, scratchInts);
                builder.Add(scratchInts, cost);
                previous.CopyBytes(scratch);
                count++;
            }
            fst = builder.Finish();
        }
        private void WriteFST(FieldInfo field, IEnumerable <BytesRef> values)
        {
            meta.WriteVInt(field.Number);
            meta.WriteByte(FST);
            meta.WriteLong(data.FilePointer);
            PositiveIntOutputs outputs = PositiveIntOutputs.Singleton;
            var  builder = new Builder <long?>(INPUT_TYPE.BYTE1, outputs);
            var  scratch = new IntsRef();
            long ord     = 0;

            foreach (BytesRef v in values)
            {
                builder.Add(Util.ToIntsRef(v, scratch), ord);
                ord++;
            }
            FST <long?> fst = builder.Finish();

            if (fst != null)
            {
                fst.Save(data);
            }
            meta.WriteVLong(ord);
        }
Пример #7
0
            internal TermsWriter(FSTOrdTermsWriter outerInstance, FieldInfo fieldInfo)
            {
                _outerInstance = outerInstance;
                _numTerms = 0;
                _fieldInfo = fieldInfo;
                _longsSize = outerInstance.postingsWriter.SetField(fieldInfo);
                _outputs = PositiveIntOutputs.Singleton;
                _builder = new Builder<long>(FST.INPUT_TYPE.BYTE1, _outputs);

                _lastBlockStatsFp = 0;
                _lastBlockMetaLongsFp = 0;
                _lastBlockMetaBytesFp = 0;
                _lastBlockLongs = new long[_longsSize];

                _lastLongs = new long[_longsSize];
                _lastMetaBytesFp = 0;
            }
        public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer)
        {
            this.fieldInfo = fieldInfo;
            fstOutputs = PositiveIntOutputs.Singleton;
            fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, fstOutputs);
            indexStart = output.FilePointer;
            ////System.out.println("VGW: field=" + fieldInfo.name);

            // Always put empty string in
            fstBuilder.Add(new IntsRef(), termsFilePointer);
            startTermsFilePointer = termsFilePointer;
        }
Пример #9
0
            internal virtual void LoadTerms()
            {
                PositiveIntOutputs posIntOutputs = PositiveIntOutputs.Singleton;
                Builder <PairOutputs.Pair <long?, PairOutputs.Pair <long?, long?> > > b;
                PairOutputs <long?, long?> outputsInner = new PairOutputs <long?, long?>(posIntOutputs, posIntOutputs);
                PairOutputs <long?, PairOutputs.Pair <long?, long?> > outputs =
                    new PairOutputs <long?, PairOutputs.Pair <long?, long?> >(posIntOutputs, outputsInner);

                b = new Builder <>(FST.INPUT_TYPE.BYTE1, outputs);
                IndexInput @in = (IndexInput)outerInstance._input.Clone();

                @in.Seek(termsStart);

                BytesRef    lastTerm      = new BytesRef(10);
                long        lastDocsStart = -1;
                int         docFreq       = 0;
                long        totalTermFreq = 0;
                FixedBitSet visitedDocs   = new FixedBitSet(maxDoc);

                IntsRef scratchIntsRef = new IntsRef();

                while (true)
                {
                    SimpleTextUtil.ReadLine(@in, scratch);
                    if (scratch.Equals(END) || StringHelper.StartsWith(scratch, FIELD))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq)));
                            sumTotalTermFreq += totalTermFreq;
                        }
                        break;
                    }
                    else if (StringHelper.StartsWith(scratch, DOC))
                    {
                        docFreq++;
                        sumDocFreq++;
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + DOC.length, scratch.Length - DOC.length,
                                                scratchUTF16);
                        int docID = ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                        visitedDocs.Set(docID);
                    }
                    else if (StringHelper.StartsWith(scratch, FREQ))
                    {
                        UnicodeUtil.UTF8toUTF16(scratch.Bytes, scratch.Offset + FREQ.length,
                                                scratch.Length - FREQ.length, scratchUTF16);
                        totalTermFreq += ArrayUtil.ParseInt(scratchUTF16.Chars, 0, scratchUTF16.length);
                    }
                    else if (StringHelper.StartsWith(scratch, TERM))
                    {
                        if (lastDocsStart != -1)
                        {
                            b.Add(Util.ToIntsRef(lastTerm, scratchIntsRef),
                                  outputs.NewPair(lastDocsStart, outputsInner.NewPair((long)docFreq, totalTermFreq)));
                        }
                        lastDocsStart = @in.FilePointer;
                        int len = scratch.Length - TERM.length;
                        if (len > lastTerm.Length)
                        {
                            lastTerm.Grow(len);
                        }
                        Array.Copy(scratch.Bytes, TERM.length, lastTerm.Bytes, 0, len);
                        lastTerm.Length   = len;
                        docFreq           = 0;
                        sumTotalTermFreq += totalTermFreq;
                        totalTermFreq     = 0;
                        termCount++;
                    }
                }
                docCount = visitedDocs.Cardinality();
                fst      = b.Finish();
            }