public SimpleTextFieldsReader(SegmentReadState state)
        {
            _maxDoc     = state.SegmentInfo.DocCount;
            _fieldInfos = state.FieldInfos;
            _input      =
                state.Directory.OpenInput(
                    SimpleTextPostingsFormat.GetPostingsFileName(state.SegmentInfo.Name, state.SegmentSuffix),
                    state.Context);
            bool success = false;

            try
            {
                _fields = ReadFields((IndexInput)_input.Clone());
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException();
                }
            }
        }
Beispiel #2
0
        private readonly FieldInfos fieldInfos; // unread

        public FixedGapTermsIndexWriter(SegmentWriteState state)
        {
            string indexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_INDEX_EXTENSION);

            termIndexInterval = state.TermIndexInterval;
            m_output          = state.Directory.CreateOutput(indexFileName, state.Context);
            bool success = false;

            try
            {
                fieldInfos = state.FieldInfos;
                WriteHeader(m_output);
                m_output.WriteInt32(termIndexInterval);
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(m_output);
                }
            }
        }
        /// <summary>
        /// expert: Creates a new writer </summary>
        public Lucene45DocValuesConsumer(SegmentWriteState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension)
        {
            bool success = false;

            try
            {
                string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension);
                data = state.Directory.CreateOutput(dataName, state.Context);
                CodecUtil.WriteHeader(data, dataCodec, Lucene45DocValuesFormat.VERSION_CURRENT);
                string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
                meta = state.Directory.CreateOutput(metaName, state.Context);
                CodecUtil.WriteHeader(meta, metaCodec, Lucene45DocValuesFormat.VERSION_CURRENT);
                maxDoc  = state.SegmentInfo.DocCount;
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this);
                }
            }
        }
        private BinaryDocValues LoadBytesVarStraight(FieldInfo field)
        {
            string     dataName  = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat");
            string     indexName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "idx");
            IndexInput data      = null;
            IndexInput index     = null;
            bool       success   = false;

            try
            {
                data = Dir.OpenInput(dataName, State.Context);
                CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
                index = Dir.OpenInput(indexName, State.Context);
                CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_START, Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
                long       totalBytes = index.ReadVLong();
                PagedBytes bytes      = new PagedBytes(16);
                bytes.Copy(data, totalBytes);
                PagedBytes.Reader bytesReader = bytes.Freeze(true);
                PackedInts.Reader reader      = PackedInts.GetReader(index);
                CodecUtil.CheckEOF(data);
                CodecUtil.CheckEOF(index);
                success = true;
                RamBytesUsed_Renamed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed());
                return(new BinaryDocValuesAnonymousInnerClassHelper2(this, bytesReader, reader));
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(data, index);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(data, index);
                }
            }
        }
Beispiel #5
0
        /// <summary>
        /// Save a single segment's info. </summary>
        public override void Write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext)
        {
            string fileName = IndexFileNames.SegmentFileName(si.Name, "", Lucene40SegmentInfoFormat.SI_EXTENSION);

            si.AddFile(fileName);

            IndexOutput output = dir.CreateOutput(fileName, ioContext);

            bool success = false;

            try
            {
                CodecUtil.WriteHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT);
                // Write the Lucene version that created this segment, since 3.1
                output.WriteString(si.Version);
                output.WriteInt32(si.DocCount);

                output.WriteByte((byte)(sbyte)(si.UseCompoundFile ? SegmentInfo.YES : SegmentInfo.NO));
                output.WriteStringStringMap(si.Diagnostics);
                output.WriteStringStringMap(Collections.EmptyMap <string, string>());
                output.WriteStringSet(si.GetFiles());

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(output);
                    si.Dir.DeleteFile(fileName);
                }
                else
                {
                    output.Dispose();
                }
            }
        }
        public virtual Query GetQuery(XmlElement e)
        {
            string fieldName = DOMUtils.GetAttributeWithInheritanceOrFail(e, "fieldName");
            string text      = DOMUtils.GetNonBlankTextOrFail(e);

            BooleanQuery bq = new BooleanQuery(DOMUtils.GetAttribute(e, "disableCoord", false));

            bq.MinimumNumberShouldMatch = DOMUtils.GetAttribute(e, "minimumNumberShouldMatch", 0);
            TokenStream ts = null;

            try
            {
                ts = analyzer.TokenStream(fieldName, text);
                ITermToBytesRefAttribute termAtt = ts.AddAttribute <ITermToBytesRefAttribute>();
                Term     term  = null;
                BytesRef bytes = termAtt.BytesRef;
                ts.Reset();
                while (ts.IncrementToken())
                {
                    termAtt.FillBytesRef();
                    term = new Term(fieldName, BytesRef.DeepCopyOf(bytes));
                    bq.Add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
                }
                ts.End();
            }
            catch (IOException ioe)
            {
                throw new Exception("Error constructing terms from index:" + ioe);
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(ts);
            }

            bq.Boost = DOMUtils.GetAttribute(e, "boost", 1.0f);
            return(bq);
        }
Beispiel #7
0
        public override SpanQuery GetSpanQuery(XmlElement e)
        {
            string fieldName = DOMUtils.GetAttributeWithInheritanceOrFail(e, "fieldName");
            string value     = DOMUtils.GetNonBlankTextOrFail(e);

            List <SpanQuery> clausesList = new List <SpanQuery>();

            TokenStream ts = null;

            try
            {
                ts = analyzer.TokenStream(fieldName, value);
                ITermToBytesRefAttribute termAtt = ts.AddAttribute <ITermToBytesRefAttribute>();
                BytesRef bytes = termAtt.BytesRef;
                ts.Reset();
                while (ts.IncrementToken())
                {
                    termAtt.FillBytesRef();
                    SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.DeepCopyOf(bytes)));
                    clausesList.Add(stq);
                }
                ts.End();
                SpanOrQuery soq = new SpanOrQuery(clausesList.ToArray(/*new SpanQuery[clausesList.size()]*/));
                soq.Boost = DOMUtils.GetAttribute(e, "boost", 1.0f);
                return(soq);
            }
#pragma warning disable 168
            catch (IOException ioe)
#pragma warning restore 168
            {
                throw new ParserException("IOException parsing value:" + value);
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(ts);
            }
        }
Beispiel #8
0
        public virtual void TestStopList()
        {
            CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, new string[] { "good", "test", "analyzer" }, false);
            StopAnalyzer newStop      = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
            TokenStream  stream       = newStop.TokenStream("test", "This is a good test of the english stop analyzer");

            try
            {
                assertNotNull(stream);
                ICharTermAttribute termAtt = stream.GetAttribute <ICharTermAttribute>();

                stream.Reset();
                while (stream.IncrementToken())
                {
                    string text = termAtt.ToString();
                    assertFalse(stopWordsSet.contains(text));
                }
                stream.End();
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(stream);
            }
        }
Beispiel #9
0
        internal Lucene42NormsConsumer(SegmentWriteState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension, float acceptableOverheadRatio)
        {
            this.AcceptableOverheadRatio = acceptableOverheadRatio;
            MaxDoc = state.SegmentInfo.DocCount;
            bool success = false;

            try
            {
                string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension);
                Data = state.Directory.CreateOutput(dataName, state.Context);
                CodecUtil.WriteHeader(Data, dataCodec, Lucene42DocValuesProducer.VERSION_CURRENT);
                string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
                Meta = state.Directory.CreateOutput(metaName, state.Context);
                CodecUtil.WriteHeader(Meta, metaCodec, Lucene42DocValuesProducer.VERSION_CURRENT);
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this);
                }
            }
        }
Beispiel #10
0
 private IndexOutput GetOutput()
 {
     lock (this)
     {
         if (DataOut == null)
         {
             bool success = false;
             try
             {
                 DataOut = Directory.CreateOutput(DataFileName, IOContext.DEFAULT);
                 CodecUtil.WriteHeader(DataOut, DATA_CODEC, VERSION_CURRENT);
                 success = true;
             }
             finally
             {
                 if (!success)
                 {
                     IOUtils.CloseWhileHandlingException((IDisposable)DataOut);
                 }
             }
         }
         return(DataOut);
     }
 }
Beispiel #11
0
        public virtual void Split()
        {
            bool            success = false;
            DirectoryReader reader  = DirectoryReader.Open(input);

            try
            {
                // pass an individual config in here since one config can not be reused!
                CreateIndex(config1, dir1, reader, docsInFirstIndex, false);
                CreateIndex(config2, dir2, reader, docsInFirstIndex, true);
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(reader);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(reader);
                }
            }
        }
Beispiel #12
0
        private void Initialize(Directory directory, string segment, FieldInfos fis, int interval, bool isi)
        {
            IndexInterval = interval;
            FieldInfos    = fis;
            IsIndex       = isi;
            Output        = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", (IsIndex ? Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION : Lucene3xPostingsFormat.TERMS_EXTENSION)), IOContext.DEFAULT);
            bool success = false;

            try
            {
                Output.WriteInt32(FORMAT_CURRENT); // write format
                Output.WriteInt64(0);              // leave space for size
                Output.WriteInt32(IndexInterval);  // write indexInterval
                Output.WriteInt32(SkipInterval);   // write skipInterval
                Output.WriteInt32(MaxSkipLevels);  // write maxSkipLevels
                Debug.Assert(InitUTF16Results());
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(Output);

                    try
                    {
                        directory.DeleteFile(IndexFileNames.SegmentFileName(segment, "", (IsIndex ? Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION : Lucene3xPostingsFormat.TERMS_EXTENSION)));
                    }
#pragma warning disable 168
                    catch (IOException ignored)
#pragma warning restore 168
                    {
                    }
                }
            }
        }
Beispiel #13
0
 public override void Run()
 {
     try
     {
         foreach (KeyValuePair <string, BytesRef> mapping in Map)
         {
             string      term           = mapping.Key;
             BytesRef    expected       = mapping.Value;
             IOException priorException = null;
             TokenStream ts             = Analyzer.TokenStream("fake", new StreamReader(term));
             try
             {
                 ITermToBytesRefAttribute termAtt = ts.AddAttribute <ITermToBytesRefAttribute>();
                 BytesRef bytes = termAtt.BytesRef;
                 ts.Reset();
                 Assert.IsTrue(ts.IncrementToken());
                 termAtt.FillBytesRef();
                 Assert.AreEqual(expected, bytes);
                 Assert.IsFalse(ts.IncrementToken());
                 ts.End();
             }
             catch (IOException e)
             {
                 priorException = e;
             }
             finally
             {
                 IOUtils.CloseWhileHandlingException(priorException, ts);
             }
         }
     }
     catch (IOException e)
     {
         throw (Exception)e;
     }
 }
Beispiel #14
0
 protected override void Dispose(bool disposing)
 {
     if (disposing)
     {
         // TODO: add a finish() at least to PushBase? DV too...?
         bool success = false;
         try
         {
             if (DocOut != null)
             {
                 CodecUtil.WriteFooter(DocOut);
             }
             if (PosOut != null)
             {
                 CodecUtil.WriteFooter(PosOut);
             }
             if (PayOut != null)
             {
                 CodecUtil.WriteFooter(PayOut);
             }
             success = true;
         }
         finally
         {
             if (success)
             {
                 IOUtils.Close(DocOut, PosOut, PayOut);
             }
             else
             {
                 IOUtils.CloseWhileHandlingException(DocOut, PosOut, PayOut);
             }
             DocOut = PosOut = PayOut = null;
         }
     }
 }
Beispiel #15
0
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context)
        {
            string      fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION);
            IndexOutput output   = directory.CreateOutput(fileName, context);
            bool        success  = false;

            try
            {
                CodecUtil.WriteHeader(output, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_CURRENT);
                output.WriteVInt(infos.Size());
                foreach (FieldInfo fi in infos)
                {
                    FieldInfo.IndexOptions?indexOptions = fi.FieldIndexOptions;
                    sbyte bits = 0x0;
                    if (fi.HasVectors())
                    {
                        bits |= Lucene42FieldInfosFormat.STORE_TERMVECTOR;
                    }
                    if (fi.OmitsNorms())
                    {
                        bits |= Lucene42FieldInfosFormat.OMIT_NORMS;
                    }
                    if (fi.HasPayloads())
                    {
                        bits |= Lucene42FieldInfosFormat.STORE_PAYLOADS;
                    }
                    if (fi.Indexed)
                    {
                        bits |= Lucene42FieldInfosFormat.IS_INDEXED;
                        Debug.Assert(indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads());
                        if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
                        {
                            bits |= Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
                        }
                        else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                        {
                            bits |= Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
                        }
                        else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS)
                        {
                            bits |= Lucene42FieldInfosFormat.OMIT_POSITIONS;
                        }
                    }
                    output.WriteString(fi.Name);
                    output.WriteVInt(fi.Number);
                    output.WriteByte(bits);

                    // pack the DV types in one byte
                    sbyte dv  = DocValuesByte(fi.DocValuesType);
                    sbyte nrm = DocValuesByte(fi.NormType);
                    Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0);
                    sbyte val = unchecked ((sbyte)(0xff & ((nrm << 4) | dv)));
                    output.WriteByte(val);
                    output.WriteStringStringMap(fi.Attributes());
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(output);
                }
            }
        }
Beispiel #16
0
        // Writes field updates (new _X_N updates files) to the directory
        public virtual void WriteFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates)
        {
            lock (this)
            {
                //Debug.Assert(Thread.holdsLock(Writer));
                //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates);

                Debug.Assert(dvUpdates.Any());

                // Do this so we can delete any created files on
                // exception; this saves all codecs from having to do
                // it:
                TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);

                FieldInfos fieldInfos = null;
                bool       success    = false;
                try
                {
                    Codec codec = Info.Info.Codec;

                    // reader could be null e.g. for a just merged segment (from
                    // IndexWriter.commitMergedDeletes).
                    SegmentReader reader = this.Reader == null ? new SegmentReader(Info, Writer.Config.ReaderTermsIndexDivisor, IOContext.READONCE) : this.Reader;
                    try
                    {
                        // clone FieldInfos so that we can update their dvGen separately from
                        // the reader's infos and write them to a new fieldInfos_gen file
                        FieldInfos.Builder builder = new FieldInfos.Builder(Writer.GlobalFieldNumberMap);
                        // cannot use builder.add(reader.getFieldInfos()) because it does not
                        // clone FI.attributes as well FI.dvGen
                        foreach (FieldInfo fi in reader.FieldInfos)
                        {
                            FieldInfo clone = builder.Add(fi);
                            // copy the stuff FieldInfos.Builder doesn't copy
                            if (fi.Attributes() != null)
                            {
                                foreach (KeyValuePair <string, string> e in fi.Attributes())
                                {
                                    clone.PutAttribute(e.Key, e.Value);
                                }
                            }
                            clone.DocValuesGen = fi.DocValuesGen;
                        }
                        // create new fields or update existing ones to have NumericDV type
                        foreach (string f in dvUpdates.NumericDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, NumericDocValuesField.TYPE);
                        }
                        // create new fields or update existing ones to have BinaryDV type
                        foreach (string f in dvUpdates.BinaryDVUpdates.Keys)
                        {
                            builder.AddOrUpdate(f, BinaryDocValuesField.fType);
                        }

                        fieldInfos = builder.Finish();
                        long              nextFieldInfosGen     = Info.NextFieldInfosGen;
                        string            segmentSuffix         = nextFieldInfosGen.ToString(CultureInfo.InvariantCulture);//Convert.ToString(nextFieldInfosGen, Character.MAX_RADIX));
                        SegmentWriteState state                 = new SegmentWriteState(null, trackingDir, Info.Info, fieldInfos, Writer.Config.TermIndexInterval, null, IOContext.DEFAULT, segmentSuffix);
                        DocValuesFormat   docValuesFormat       = codec.DocValuesFormat();
                        DocValuesConsumer fieldsConsumer        = docValuesFormat.FieldsConsumer(state);
                        bool              fieldsConsumerSuccess = false;
                        try
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
                            foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates)
                            {
                                string field = e.Key;
                                NumericDocValuesFieldUpdates fieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                Debug.Assert(fieldInfo != null);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddNumericField(fieldInfo, GetLongEnumerable(reader, field, fieldUpdates));
                            }

                            //        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
                            foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates)
                            {
                                string field = e.Key;
                                BinaryDocValuesFieldUpdates dvFieldUpdates = e.Value;
                                FieldInfo fieldInfo = fieldInfos.FieldInfo(field);
                                Debug.Assert(fieldInfo != null);

                                //          System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " f=" + dvFieldUpdates + ", updates=" + dvFieldUpdates);

                                fieldInfo.DocValuesGen = nextFieldInfosGen;
                                // write the numeric updates to a new gen'd docvalues file
                                fieldsConsumer.AddBinaryField(fieldInfo, GetBytesRefEnumerable(reader, field, dvFieldUpdates));
                            }

                            codec.FieldInfosFormat().FieldInfosWriter.Write(trackingDir, Info.Info.Name, segmentSuffix, fieldInfos, IOContext.DEFAULT);
                            fieldsConsumerSuccess = true;
                        }
                        finally
                        {
                            if (fieldsConsumerSuccess)
                            {
                                fieldsConsumer.Dispose();
                            }
                            else
                            {
                                IOUtils.CloseWhileHandlingException(fieldsConsumer);
                            }
                        }
                    }
                    finally
                    {
                        if (reader != this.Reader)
                        {
                            //          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
                            reader.Dispose();
                        }
                    }

                    success = true;
                }
                finally
                {
                    if (!success)
                    {
                        // Advance only the nextWriteDocValuesGen so that a 2nd
                        // attempt to write will write to a new file
                        Info.AdvanceNextWriteFieldInfosGen();

                        // Delete any partially created file(s):
                        foreach (string fileName in trackingDir.CreatedFiles)
                        {
                            try
                            {
                                dir.DeleteFile(fileName);
                            }
                            catch (Exception)
                            {
                                // Ignore so we throw only the first exc
                            }
                        }
                    }
                }

                Info.AdvanceFieldInfosGen();
                // copy all the updates to mergingUpdates, so they can later be applied to the merged segment
                if (IsMerging)
                {
                    foreach (KeyValuePair <string, NumericDocValuesFieldUpdates> e in dvUpdates.NumericDVUpdates)
                    {
                        DocValuesFieldUpdates updates;
                        if (!MergingDVUpdates.TryGetValue(e.Key, out updates))
                        {
                            MergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                    foreach (KeyValuePair <string, BinaryDocValuesFieldUpdates> e in dvUpdates.BinaryDVUpdates)
                    {
                        DocValuesFieldUpdates updates;
                        if (!MergingDVUpdates.TryGetValue(e.Key, out updates))
                        {
                            MergingDVUpdates[e.Key] = e.Value;
                        }
                        else
                        {
                            updates.Merge(e.Value);
                        }
                    }
                }

                // create a new map, keeping only the gens that are in use
                IDictionary <long, ISet <string> > genUpdatesFiles    = Info.UpdatesFiles;
                IDictionary <long, ISet <string> > newGenUpdatesFiles = new Dictionary <long, ISet <string> >();
                long fieldInfosGen = Info.FieldInfosGen;
                foreach (FieldInfo fi in fieldInfos)
                {
                    long dvGen = fi.DocValuesGen;
                    if (dvGen != -1 && !newGenUpdatesFiles.ContainsKey(dvGen))
                    {
                        if (dvGen == fieldInfosGen)
                        {
                            newGenUpdatesFiles[fieldInfosGen] = trackingDir.CreatedFiles;
                        }
                        else
                        {
                            newGenUpdatesFiles[dvGen] = genUpdatesFiles[dvGen];
                        }
                    }
                }

                Info.GenUpdatesFiles = newGenUpdatesFiles;

                // wrote new files, should checkpoint()
                Writer.Checkpoint();

                // if there is a reader open, reopen it to reflect the updates
                if (Reader != null)
                {
                    SegmentReader newReader = new SegmentReader(Info, Reader, LiveDocs_Renamed, Info.Info.DocCount - Info.DelCount - PendingDeleteCount_Renamed);
                    bool          reopened  = false;
                    try
                    {
                        Reader.DecRef();
                        Reader   = newReader;
                        reopened = true;
                    }
                    finally
                    {
                        if (!reopened)
                        {
                            newReader.DecRef();
                        }
                    }
                }
            }
        }
Beispiel #17
0
        //static final boolean TEST = false;

        public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
        {
            string termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
            string termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);

            this.postingsReader = postingsReader;
            ChecksumIndexInput indexIn = null;
            IndexInput         blockIn = null;
            bool success = false;

            try
            {
                indexIn = state.Directory.OpenChecksumInput(termsIndexFileName, state.Context);
                blockIn = state.Directory.OpenInput(termsBlockFileName, state.Context);
                version = ReadHeader(indexIn);
                ReadHeader(blockIn);
                if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM)
                {
                    CodecUtil.ChecksumEntireFile(blockIn);
                }

                this.postingsReader.Init(blockIn);
                SeekDir(blockIn);

                FieldInfos fieldInfos = state.FieldInfos;
                int        numFields  = blockIn.ReadVInt();
                for (int i = 0; i < numFields; i++)
                {
                    FieldInfo fieldInfo        = fieldInfos.FieldInfo(blockIn.ReadVInt());
                    bool      hasFreq          = fieldInfo.IndexOptions != FieldInfo.IndexOptions.DOCS_ONLY;
                    long      numTerms         = blockIn.ReadVLong();
                    long      sumTotalTermFreq = hasFreq ? blockIn.ReadVLong() : -1;
                    long      sumDocFreq       = blockIn.ReadVLong();
                    int       docCount         = blockIn.ReadVInt();
                    int       longsSize        = blockIn.ReadVInt();
                    var       index            = new FST <long>(indexIn, PositiveIntOutputs.Singleton);

                    var current  = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
                    var previous = fields[fieldInfo.Name] = current;
                    CheckFieldSummary(state.SegmentInfo, indexIn, blockIn, current, previous);
                }
                if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(indexIn);
                }
                else
                {
                    CodecUtil.CheckEOF(indexIn);
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(indexIn, blockIn);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(indexIn, blockIn);
                }
            }
        }
        /// <summary>
        /// Retrieve suggestions, specifying whether all terms
        ///  must match (<paramref name="allTermsRequired"/>) and whether the hits
        ///  should be highlighted (<paramref name="doHighlight"/>).
        /// </summary>
        public virtual IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, int num, bool allTermsRequired, bool doHighlight)
        {
            if (m_searcherMgr == null)
            {
                throw new InvalidOperationException("suggester was not built");
            }

            Occur occur;

            if (allTermsRequired)
            {
                occur = Occur.MUST;
            }
            else
            {
                occur = Occur.SHOULD;
            }

            TokenStream  ts = null;
            BooleanQuery query;
            var          matchedTokens = new HashSet <string>();
            string       prefixToken   = null;

            try
            {
                ts = m_queryAnalyzer.GetTokenStream("", new StringReader(key));

                //long t0 = System.currentTimeMillis();
                ts.Reset();
                var    termAtt   = ts.AddAttribute <ICharTermAttribute>();
                var    offsetAtt = ts.AddAttribute <IOffsetAttribute>();
                string lastToken = null;
                query = new BooleanQuery();
                int maxEndOffset = -1;
                matchedTokens = new HashSet <string>();
                while (ts.IncrementToken())
                {
                    if (lastToken != null)
                    {
                        matchedTokens.Add(lastToken);
                        query.Add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
                    }
                    lastToken = termAtt.ToString();
                    if (lastToken != null)
                    {
                        maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset);
                    }
                }
                ts.End();

                if (lastToken != null)
                {
                    Query lastQuery;
                    if (maxEndOffset == offsetAtt.EndOffset)
                    {
                        // Use PrefixQuery (or the ngram equivalent) when
                        // there was no trailing discarded chars in the
                        // string (e.g. whitespace), so that if query does
                        // not end with a space we show prefix matches for
                        // that token:
                        lastQuery   = GetLastTokenQuery(lastToken);
                        prefixToken = lastToken;
                    }
                    else
                    {
                        // Use TermQuery for an exact match if there were
                        // trailing discarded chars (e.g. whitespace), so
                        // that if query ends with a space we only show
                        // exact matches for that term:
                        matchedTokens.Add(lastToken);
                        lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
                    }
                    if (lastQuery != null)
                    {
                        query.Add(lastQuery, occur);
                    }
                }

                if (contexts != null)
                {
                    BooleanQuery sub = new BooleanQuery();
                    query.Add(sub, Occur.MUST);
                    foreach (BytesRef context in contexts)
                    {
                        // NOTE: we "should" wrap this in
                        // ConstantScoreQuery, or maybe send this as a
                        // Filter instead to search, but since all of
                        // these are MUST'd, the change to the score won't
                        // affect the overall ranking.  Since we indexed
                        // as DOCS_ONLY, the perf should be the same
                        // either way (no freq int[] blocks to decode):

                        // TODO: if we had a BinaryTermField we could fix
                        // this "must be valid ut8f" limitation:
                        sub.Add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.Utf8ToString())), Occur.SHOULD);
                    }
                }
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(ts);
            }

            // TODO: we could allow blended sort here, combining
            // weight w/ score.  Now we ignore score and sort only
            // by weight:

            Query finalQuery = FinishQuery(query, allTermsRequired);

            //System.out.println("finalQuery=" + query);

            // Sort by weight, descending:
            TopFieldCollector c = TopFieldCollector.Create(SORT, num, true, false, false, false);

            // We sorted postings by weight during indexing, so we
            // only retrieve the first num hits now:
            ICollector           c2       = new EarlyTerminatingSortingCollector(c, SORT, num);
            IndexSearcher        searcher = m_searcherMgr.Acquire();
            IList <LookupResult> results  = null;

            try
            {
                //System.out.println("got searcher=" + searcher);
                searcher.Search(finalQuery, c2);

                TopFieldDocs hits = (TopFieldDocs)c.GetTopDocs();

                // Slower way if postings are not pre-sorted by weight:
                // hits = searcher.search(query, null, num, SORT);
                results = CreateResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
            }
            finally
            {
                m_searcherMgr.Release(searcher);
            }

            //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest");
            //System.out.println(results);

            return(results);
        }
Beispiel #19
0
        public override void Build(IInputIterator iterator)
        {
            if (iterator.HasPayloads)
            {
                throw new System.ArgumentException("this suggester doesn't support payloads");
            }
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            FileInfo tempInput  = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".input", OfflineSorter.DefaultTempDir());
            FileInfo tempSorted = FileSupport.CreateTempFile(typeof(FSTCompletionLookup).Name, ".sorted", OfflineSorter.DefaultTempDir());

            OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
            OfflineSorter.ByteSequencesReader reader = null;
            ExternalRefSorter sorter = null;

            // Push floats up front before sequences to sort them. For now, assume they are non-negative.
            // If negative floats are allowed some trickery needs to be done to find their byte order.
            bool success = false;

            count = 0;
            try
            {
                byte[] buffer = new byte[0];
                ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
                BytesRef            spare;
                while ((spare = iterator.Next()) != null)
                {
                    if (spare.Length + 4 >= buffer.Length)
                    {
                        buffer = ArrayUtil.Grow(buffer, spare.Length + 4);
                    }

                    output.Reset(buffer);
                    output.WriteInt32(EncodeWeight(iterator.Weight));
                    output.WriteBytes(spare.Bytes, spare.Offset, spare.Length);
                    writer.Write(buffer, 0, output.Position);
                }
                writer.Dispose();

                // We don't know the distribution of scores and we need to bucket them, so we'll sort
                // and divide into equal buckets.
                OfflineSorter.SortInfo info = (new OfflineSorter()).Sort(tempInput, tempSorted);
                tempInput.Delete();
                FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter = new ExternalRefSorter(new OfflineSorter()), sharedTailLength);

                int inputLines = info.Lines;
                reader = new OfflineSorter.ByteSequencesReader(tempSorted);
                long line                = 0;
                int  previousBucket      = 0;
                int  previousScore       = 0;
                ByteArrayDataInput input = new ByteArrayDataInput();
                BytesRef           tmp1  = new BytesRef();
                BytesRef           tmp2  = new BytesRef();
                while (reader.Read(tmp1))
                {
                    input.Reset(tmp1.Bytes);
                    int currentScore = input.ReadInt32();

                    int bucket;
                    if (line > 0 && currentScore == previousScore)
                    {
                        bucket = previousBucket;
                    }
                    else
                    {
                        bucket = (int)(line * buckets / inputLines);
                    }
                    previousScore  = currentScore;
                    previousBucket = bucket;

                    // Only append the input, discard the weight.
                    tmp2.Bytes  = tmp1.Bytes;
                    tmp2.Offset = input.Position;
                    tmp2.Length = tmp1.Length - input.Position;
                    builder.Add(tmp2, bucket);

                    line++;
                    count++;
                }

                // The two FSTCompletions share the same automaton.
                this.higherWeightsCompletion = builder.Build();
                this.normalCompletion        = new FSTCompletion(higherWeightsCompletion.FST, false, exactMatchFirst);

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(reader, writer, sorter);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(reader, writer, sorter);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
        public override NumericDocValues GetNumeric(FieldInfo field)
        {
            lock (this)
            {
                NumericDocValues instance;
                if (!NumericInstances.TryGetValue(field.Number, out instance))
                {
                    string     fileName = IndexFileNames.SegmentFileName(State.SegmentInfo.Name + "_" + Convert.ToString(field.Number), SegmentSuffix, "dat");
                    IndexInput input    = Dir.OpenInput(fileName, State.Context);
                    bool       success  = false;
                    try
                    {
                        var type = LegacyDocValuesType.ValueOf(field.GetAttribute(LegacyKey));

                        //switch (Enum.Parse(typeof(LegacyDocValuesType), field.GetAttribute(LegacyKey)))
                        //{
                        if (type == LegacyDocValuesType.VAR_INTS)
                        {
                            instance = LoadVarIntsField(field, input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_8)
                        {
                            instance = LoadByteField(field, input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_16)
                        {
                            instance = LoadShortField(field, input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_32)
                        {
                            instance = LoadIntField(field, input);
                        }
                        else if (type == LegacyDocValuesType.FIXED_INTS_64)
                        {
                            instance = LoadLongField(field, input);
                        }
                        else if (type == LegacyDocValuesType.FLOAT_32)
                        {
                            instance = LoadFloatField(field, input);
                        }
                        else if (type == LegacyDocValuesType.FLOAT_64)
                        {
                            instance = LoadDoubleField(field, input);
                        }
                        else
                        {
                            throw new InvalidOperationException();
                        }

                        CodecUtil.CheckEOF(input);
                        success = true;
                    }
                    finally
                    {
                        if (success)
                        {
                            IOUtils.Close(input);
                        }
                        else
                        {
                            IOUtils.CloseWhileHandlingException(input);
                        }
                    }
                    NumericInstances[field.Number] = instance;
                }
                return(instance);
            }
        }
Beispiel #21
0
        /// <summary>
        /// Creates a postings writer with the specified PackedInts overhead ratio </summary>
        // TODO: does this ctor even make sense?
        public Lucene41PostingsWriter(SegmentWriteState state, float acceptableOverheadRatio)
            : base()
        {
            DocOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), state.Context);
            IndexOutput posOut  = null;
            IndexOutput payOut  = null;
            bool        success = false;

            try
            {
                CodecUtil.WriteHeader(DocOut, DOC_CODEC, VERSION_CURRENT);
                ForUtil = new ForUtil(acceptableOverheadRatio, DocOut);
                if (state.FieldInfos.HasProx())
                {
                    PosDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE];
                    posOut         = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), state.Context);
                    CodecUtil.WriteHeader(posOut, POS_CODEC, VERSION_CURRENT);

                    if (state.FieldInfos.HasPayloads())
                    {
                        PayloadBytes        = new sbyte[128];
                        PayloadLengthBuffer = new int[ForUtil.MAX_DATA_SIZE];
                    }
                    else
                    {
                        PayloadBytes        = null;
                        PayloadLengthBuffer = null;
                    }

                    if (state.FieldInfos.HasOffsets())
                    {
                        OffsetStartDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE];
                        OffsetLengthBuffer     = new int[ForUtil.MAX_DATA_SIZE];
                    }
                    else
                    {
                        OffsetStartDeltaBuffer = null;
                        OffsetLengthBuffer     = null;
                    }

                    if (state.FieldInfos.HasPayloads() || state.FieldInfos.HasOffsets())
                    {
                        payOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), state.Context);
                        CodecUtil.WriteHeader(payOut, PAY_CODEC, VERSION_CURRENT);
                    }
                }
                else
                {
                    PosDeltaBuffer         = null;
                    PayloadLengthBuffer    = null;
                    OffsetStartDeltaBuffer = null;
                    OffsetLengthBuffer     = null;
                    PayloadBytes           = null;
                }
                this.PayOut = payOut;
                this.PosOut = posOut;
                success     = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(DocOut, posOut, payOut);
                }
            }

            DocDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE];
            FreqBuffer     = new int[ForUtil.MAX_DATA_SIZE];

            // TODO: should we try skipping every 2/4 blocks...?
            SkipWriter = new Lucene41SkipWriter(MaxSkipLevels, Lucene41PostingsFormat.BLOCK_SIZE, state.SegmentInfo.DocCount, DocOut, posOut, payOut);

            Encoded = new sbyte[ForUtil.MAX_ENCODED_SIZE];
        }
Beispiel #22
0
        public override void Build(IInputIterator iterator)
        {
            if (iterator.HasContexts)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            string prefix     = this.GetType().Name;
            var    directory  = OfflineSorter.DefaultTempDir();
            var    tempInput  = FileSupport.CreateTempFile(prefix, ".input", directory);
            var    tempSorted = FileSupport.CreateTempFile(prefix, ".sorted", directory);

            hasPayloads = iterator.HasPayloads;

            var writer = new OfflineSorter.ByteSequencesWriter(tempInput);

            OfflineSorter.ByteSequencesReader reader = null;
            var scratch = new BytesRef();

            TokenStreamToAutomaton ts2a = GetTokenStreamToAutomaton();

            bool success = false;

            count = 0;
            byte[] buffer = new byte[8];
            try
            {
                var      output = new ByteArrayDataOutput(buffer);
                BytesRef surfaceForm;

                while ((surfaceForm = iterator.Next()) != null)
                {
                    ISet <Int32sRef> paths = ToFiniteStrings(surfaceForm, ts2a);

                    maxAnalyzedPathsForOneInput = Math.Max(maxAnalyzedPathsForOneInput, paths.Count);

                    foreach (Int32sRef path in paths)
                    {
                        Util.Fst.Util.ToBytesRef(path, scratch);

                        // length of the analyzed text (FST input)
                        if (scratch.Length > ushort.MaxValue - 2)
                        {
                            throw new System.ArgumentException("cannot handle analyzed forms > " + (ushort.MaxValue - 2) +
                                                               " in length (got " + scratch.Length + ")");
                        }
                        ushort analyzedLength = (ushort)scratch.Length;

                        // compute the required length:
                        // analyzed sequence + weight (4) + surface + analyzedLength (short)
                        int requiredLength = analyzedLength + 4 + surfaceForm.Length + 2;

                        BytesRef payload;

                        if (hasPayloads)
                        {
                            if (surfaceForm.Length > (ushort.MaxValue - 2))
                            {
                                throw new ArgumentException("cannot handle surface form > " + (ushort.MaxValue - 2) +
                                                            " in length (got " + surfaceForm.Length + ")");
                            }
                            payload = iterator.Payload;
                            // payload + surfaceLength (short)
                            requiredLength += payload.Length + 2;
                        }
                        else
                        {
                            payload = null;
                        }

                        buffer = ArrayUtil.Grow(buffer, requiredLength);

                        output.Reset(buffer);

                        output.WriteInt16((short)analyzedLength);

                        output.WriteBytes(scratch.Bytes, scratch.Offset, scratch.Length);

                        output.WriteInt32(EncodeWeight(iterator.Weight));

                        if (hasPayloads)
                        {
                            for (int i = 0; i < surfaceForm.Length; i++)
                            {
                                if (surfaceForm.Bytes[i] == PAYLOAD_SEP)
                                {
                                    throw new ArgumentException(
                                              "surface form cannot contain unit separator character U+001F; this character is reserved");
                                }
                            }
                            output.WriteInt16((short)surfaceForm.Length);
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                            output.WriteBytes(payload.Bytes, payload.Offset, payload.Length);
                        }
                        else
                        {
                            output.WriteBytes(surfaceForm.Bytes, surfaceForm.Offset, surfaceForm.Length);
                        }

                        Debug.Assert(output.Position == requiredLength, output.Position + " vs " + requiredLength);

                        writer.Write(buffer, 0, output.Position);
                    }
                    count++;
                }
                writer.Dispose();

                // Sort all input/output pairs (required by FST.Builder):
                (new OfflineSorter(new AnalyzingComparer(hasPayloads))).Sort(tempInput, tempSorted);

                // Free disk space:
                tempInput.Delete();

                reader = new OfflineSorter.ByteSequencesReader(tempSorted);

                var outputs = new PairOutputs <long?, BytesRef>(PositiveInt32Outputs.Singleton,
                                                                ByteSequenceOutputs.Singleton);
                var builder = new Builder <PairOutputs <long?, BytesRef> .Pair>(FST.INPUT_TYPE.BYTE1, outputs);

                // Build FST:
                BytesRef  previousAnalyzed = null;
                BytesRef  analyzed         = new BytesRef();
                BytesRef  surface          = new BytesRef();
                Int32sRef scratchInts      = new Int32sRef();
                var       input            = new ByteArrayDataInput();

                // Used to remove duplicate surface forms (but we
                // still index the hightest-weight one).  We clear
                // this when we see a new analyzed form, so it cannot
                // grow unbounded (at most 256 entries):
                var seenSurfaceForms = new HashSet <BytesRef>();

                var dedup = 0;
                while (reader.Read(scratch))
                {
                    input.Reset(scratch.Bytes, scratch.Offset, scratch.Length);
                    ushort analyzedLength = (ushort)input.ReadInt16();
                    analyzed.Grow(analyzedLength + 2);
                    input.ReadBytes(analyzed.Bytes, 0, analyzedLength);
                    analyzed.Length = analyzedLength;

                    long cost = input.ReadInt32();

                    surface.Bytes = scratch.Bytes;
                    if (hasPayloads)
                    {
                        surface.Length = (ushort)input.ReadInt16();
                        surface.Offset = input.Position;
                    }
                    else
                    {
                        surface.Offset = input.Position;
                        surface.Length = scratch.Length - surface.Offset;
                    }

                    if (previousAnalyzed == null)
                    {
                        previousAnalyzed = new BytesRef();
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else if (analyzed.Equals(previousAnalyzed))
                    {
                        dedup++;
                        if (dedup >= maxSurfaceFormsPerAnalyzedForm)
                        {
                            // More than maxSurfaceFormsPerAnalyzedForm
                            // dups: skip the rest:
                            continue;
                        }
                        if (seenSurfaceForms.Contains(surface))
                        {
                            continue;
                        }
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }
                    else
                    {
                        dedup = 0;
                        previousAnalyzed.CopyBytes(analyzed);
                        seenSurfaceForms.Clear();
                        seenSurfaceForms.Add(BytesRef.DeepCopyOf(surface));
                    }

                    // TODO: I think we can avoid the extra 2 bytes when
                    // there is no dup (dedup==0), but we'd have to fix
                    // the exactFirst logic ... which would be sort of
                    // hairy because we'd need to special case the two
                    // (dup/not dup)...

                    // NOTE: must be byte 0 so we sort before whatever
                    // is next
                    analyzed.Bytes[analyzed.Offset + analyzed.Length]     = 0;
                    analyzed.Bytes[analyzed.Offset + analyzed.Length + 1] = (byte)dedup;
                    analyzed.Length += 2;

                    Util.Fst.Util.ToInt32sRef(analyzed, scratchInts);
                    //System.out.println("ADD: " + scratchInts + " -> " + cost + ": " + surface.utf8ToString());
                    if (!hasPayloads)
                    {
                        builder.Add(scratchInts, outputs.NewPair(cost, BytesRef.DeepCopyOf(surface)));
                    }
                    else
                    {
                        int      payloadOffset = input.Position + surface.Length;
                        int      payloadLength = scratch.Length - payloadOffset;
                        BytesRef br            = new BytesRef(surface.Length + 1 + payloadLength);
                        Array.Copy(surface.Bytes, surface.Offset, br.Bytes, 0, surface.Length);
                        br.Bytes[surface.Length] = PAYLOAD_SEP;
                        Array.Copy(scratch.Bytes, payloadOffset, br.Bytes, surface.Length + 1, payloadLength);
                        br.Length = br.Bytes.Length;
                        builder.Add(scratchInts, outputs.NewPair(cost, br));
                    }
                }
                fst = builder.Finish();

                //Util.dotToFile(fst, "/tmp/suggest.dot");

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(reader, writer);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(reader, writer);
                }

                tempInput.Delete();
                tempSorted.Delete();
            }
        }
 public override void Abort()
 {
     IOUtils.CloseWhileHandlingException(this);
     IOUtils.DeleteFilesIgnoringExceptions(Directory, IndexFileNames.SegmentFileName(Segment, SegmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION), IndexFileNames.SegmentFileName(Segment, SegmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
 }
Beispiel #24
0
        // note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front.
        // but we just don't do any seeks or reading yet.
        public Lucene3xNormsProducer(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context)
        {
            Directory separateNormsDir = info.Dir; // separate norms are never inside CFS

            Maxdoc = info.DocCount;
            string segmentName = info.Name;
            bool   success     = false;

            try
            {
                long nextNormSeek = NORMS_HEADER.Length; //skip header (header unused for now)
                foreach (FieldInfo fi in fields)
                {
                    if (fi.HasNorms())
                    {
                        string    fileName = GetNormFilename(info, fi.Number);
                        Directory d        = HasSeparateNorms(info, fi.Number) ? separateNormsDir : dir;

                        // singleNormFile means multiple norms share this file
                        bool       singleNormFile = IndexFileNames.MatchesExtension(fileName, NORMS_EXTENSION);
                        IndexInput normInput      = null;
                        long       normSeek;

                        if (singleNormFile)
                        {
                            normSeek = nextNormSeek;
                            if (SingleNormStream == null)
                            {
                                SingleNormStream = d.OpenInput(fileName, context);
                                OpenFiles.Add(SingleNormStream);
                            }
                            // All norms in the .nrm file can share a single IndexInput since
                            // they are only used in a synchronized context.
                            // If this were to change in the future, a clone could be done here.
                            normInput = SingleNormStream;
                        }
                        else
                        {
                            normInput = d.OpenInput(fileName, context);
                            OpenFiles.Add(normInput);
                            // if the segment was created in 3.2 or after, we wrote the header for sure,
                            // and don't need to do the sketchy file size check. otherwise, we check
                            // if the size is exactly equal to maxDoc to detect a headerless file.
                            // NOTE: remove this check in Lucene 5.0!
                            string version       = info.Version;
                            bool   isUnversioned = (version == null || StringHelper.VersionComparator.Compare(version, "3.2") < 0) && normInput.Length() == Maxdoc;
                            if (isUnversioned)
                            {
                                normSeek = 0;
                            }
                            else
                            {
                                normSeek = NORMS_HEADER.Length;
                            }
                        }
                        NormsDocValues norm = new NormsDocValues(this, normInput, normSeek);
                        Norms[fi.Name] = norm;
                        nextNormSeek  += Maxdoc; // increment also if some norms are separate
                    }
                }
                // TODO: change to a real check? see LUCENE-3619
                Debug.Assert(SingleNormStream == null || nextNormSeek == SingleNormStream.Length(), SingleNormStream != null ? "len: " + SingleNormStream.Length() + " expected: " + nextNormSeek : "null");
                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(OpenFiles);
                }
            }
            ramBytesUsed = new AtomicLong();
        }
        /// <summary>
        /// expert: instantiates a new reader </summary>
        protected internal Lucene45DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension)
        {
            string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
            // read in the entries from the metadata file.
            ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context);

            this.maxDoc = state.SegmentInfo.DocCount;
            bool success = false;

            try
            {
                version    = CodecUtil.CheckHeader(@in, metaCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT);
                numerics   = new Dictionary <int, NumericEntry>();
                ords       = new Dictionary <int, NumericEntry>();
                ordIndexes = new Dictionary <int, NumericEntry>();
                binaries   = new Dictionary <int, BinaryEntry>();
                sortedSets = new Dictionary <int, SortedSetEntry>();
                ReadFields(@in, state.FieldInfos);

                if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(@in);
                }
                else
                {
#pragma warning disable 612, 618
                    CodecUtil.CheckEOF(@in);
#pragma warning restore 612, 618
                }

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(@in);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(@in);
                }
            }

            success = false;
            try
            {
                string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension);
                data = state.Directory.OpenInput(dataName, state.Context);
                int version2 = CodecUtil.CheckHeader(data, dataCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT);
                if (version != version2)
                {
                    throw new Exception("Format versions mismatch");
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this.data);
                }
            }

            ramBytesUsed = new AtomicInt64(RamUsageEstimator.ShallowSizeOfInstance(this.GetType()));
        }
Beispiel #26
0
        public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext)
        {
            string     fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
            IndexInput input    = directory.OpenInput(fileName, iocontext);

            bool success = false;

            try
            {
                CodecUtil.CheckHeader(input, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_START, Lucene40FieldInfosFormat.FORMAT_CURRENT);

                int         size  = input.ReadVInt(); //read in the size
                FieldInfo[] infos = new FieldInfo[size];

                for (int i = 0; i < size; i++)
                {
                    string name            = input.ReadString();
                    int    fieldNumber     = input.ReadVInt();
                    byte   bits            = input.ReadByte();
                    bool   isIndexed       = (bits & Lucene40FieldInfosFormat.IS_INDEXED) != 0;
                    bool   storeTermVector = (bits & Lucene40FieldInfosFormat.STORE_TERMVECTOR) != 0;
                    bool   omitNorms       = (bits & Lucene40FieldInfosFormat.OMIT_NORMS) != 0;
                    bool   storePayloads   = (bits & Lucene40FieldInfosFormat.STORE_PAYLOADS) != 0;
                    FieldInfo.IndexOptions indexOptions;
                    if (!isIndexed)
                    {
                        indexOptions = default(FieldInfo.IndexOptions);
                    }
                    else if ((bits & Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0)
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                    }
                    else if ((bits & Lucene40FieldInfosFormat.OMIT_POSITIONS) != 0)
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
                    }
                    else if ((bits & Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0)
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                    }
                    else
                    {
                        indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                    }

                    // LUCENE-3027: past indices were able to write
                    // storePayloads=true when omitTFAP is also true,
                    // which is invalid.  We correct that, here:
                    if (isIndexed && indexOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0)
                    {
                        storePayloads = false;
                    }
                    // DV Types are packed in one byte
                    byte val = input.ReadByte();
                    LegacyDocValuesType          oldValuesType = GetDocValuesType((sbyte)(val & 0x0F));
                    LegacyDocValuesType          oldNormsType  = GetDocValuesType((sbyte)(((int)((uint)val >> 4)) & 0x0F));
                    IDictionary <string, string> attributes    = input.ReadStringStringMap();
                    if (oldValuesType.Mapping != null)
                    {
                        attributes[LEGACY_DV_TYPE_KEY] = oldValuesType.Name;
                    }
                    if (oldNormsType.Mapping != null)
                    {
                        if (oldNormsType.Mapping != FieldInfo.DocValuesType_e.NUMERIC)
                        {
                            throw new CorruptIndexException("invalid norm type: " + oldNormsType + " (resource=" + input + ")");
                        }
                        attributes[LEGACY_NORM_TYPE_KEY] = oldNormsType.Name;
                    }
                    infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, oldValuesType.Mapping, oldNormsType.Mapping, attributes);
                }
                CodecUtil.CheckEOF(input);
                FieldInfos fieldInfos = new FieldInfos(infos);
                success = true;
                return(fieldInfos);
            }
            finally
            {
                if (success)
                {
                    input.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(input);
                }
            }
        }
Beispiel #27
0
 public override void Abort()
 {
     IOUtils.CloseWhileHandlingException(this);
     IOUtils.DeleteFilesIgnoringExceptions(Directory, IndexFileNames.SegmentFileName(Segment, SegmentSuffix, VECTORS_EXTENSION), IndexFileNames.SegmentFileName(Segment, SegmentSuffix, VECTORS_INDEX_EXTENSION));
 }
Beispiel #28
0
        protected override IQueryNode PostProcessNode(IQueryNode node)
        {
            if (node is ITextableQueryNode &&
                !(node is WildcardQueryNode) &&
                !(node is FuzzyQueryNode) &&
                !(node is RegexpQueryNode) &&
                !(node.Parent is IRangeQueryNode))
            {
                FieldQueryNode fieldNode = ((FieldQueryNode)node);
                string         text      = fieldNode.GetTextAsString();
                string         field     = fieldNode.GetFieldAsString();

                CachingTokenFilter          buffer     = null;
                IPositionIncrementAttribute posIncrAtt = null;
                int  numTokens     = 0;
                int  positionCount = 0;
                bool severalTokensAtSamePosition = false;

                TokenStream source = null;
                try
                {
                    source = this.analyzer.TokenStream(field, text);
                    source.Reset();
                    buffer = new CachingTokenFilter(source);

                    if (buffer.HasAttribute <IPositionIncrementAttribute>())
                    {
                        posIncrAtt = buffer.GetAttribute <IPositionIncrementAttribute>();
                    }

                    try
                    {
                        while (buffer.IncrementToken())
                        {
                            numTokens++;
                            int positionIncrement = (posIncrAtt != null) ? posIncrAtt
                                                    .PositionIncrement : 1;
                            if (positionIncrement != 0)
                            {
                                positionCount += positionIncrement;
                            }
                            else
                            {
                                severalTokensAtSamePosition = true;
                            }
                        }
                    }
#pragma warning disable 168
                    catch (IOException e)
#pragma warning restore 168
                    {
                        // ignore
                    }
                }
                catch (IOException e)
                {
                    throw new Exception(e.Message, e);
                }
                finally
                {
                    IOUtils.CloseWhileHandlingException(source);
                }

                // rewind the buffer stream
                buffer.Reset();

                if (!buffer.HasAttribute <ICharTermAttribute>())
                {
                    return(new NoTokenFoundQueryNode());
                }

                ICharTermAttribute termAtt = buffer.GetAttribute <ICharTermAttribute>();

                if (numTokens == 0)
                {
                    return(new NoTokenFoundQueryNode());
                }
                else if (numTokens == 1)
                {
                    string term = null;
                    try
                    {
                        bool hasNext;
                        hasNext = buffer.IncrementToken();
                        Debug.Assert(hasNext == true);
                        term = termAtt.ToString();
                    }
#pragma warning disable 168
                    catch (IOException e)
#pragma warning restore 168
                    {
                        // safe to ignore, because we know the number of tokens
                    }

                    fieldNode.Text = term.ToCharSequence();

                    return(fieldNode);
                }
                else if (severalTokensAtSamePosition || !(node is QuotedFieldQueryNode))
                {
                    if (positionCount == 1 || !(node is QuotedFieldQueryNode))
                    {
                        // no phrase query:

                        if (positionCount == 1)
                        {
                            // simple case: only one position, with synonyms
                            List <IQueryNode> children = new List <IQueryNode>();

                            for (int i = 0; i < numTokens; i++)
                            {
                                string term = null;
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                    term = termAtt.ToString();
                                }
#pragma warning disable 168
                                catch (IOException e)
#pragma warning restore 168
                                {
                                    // safe to ignore, because we know the number of tokens
                                }

                                children.Add(new FieldQueryNode(field, term, -1, -1));
                            }
                            return(new GroupQueryNode(
                                       new StandardBooleanQueryNode(children, positionCount == 1)));
                        }
                        else
                        {
                            // multiple positions
                            IQueryNode q            = new StandardBooleanQueryNode(new List <IQueryNode>(), false);
                            IQueryNode currentQuery = null;
                            for (int i = 0; i < numTokens; i++)
                            {
                                string term = null;
                                try
                                {
                                    bool hasNext = buffer.IncrementToken();
                                    Debug.Assert(hasNext == true);
                                    term = termAtt.ToString();
                                }
#pragma warning disable 168
                                catch (IOException e)
#pragma warning restore 168
                                {
                                    // safe to ignore, because we know the number of tokens
                                }
                                if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0)
                                {
                                    if (!(currentQuery is BooleanQueryNode))
                                    {
                                        IQueryNode t = currentQuery;
                                        currentQuery = new StandardBooleanQueryNode(new List <IQueryNode>(), true);
                                        ((BooleanQueryNode)currentQuery).Add(t);
                                    }
                                    ((BooleanQueryNode)currentQuery).Add(new FieldQueryNode(field, term, -1, -1));
                                }
                                else
                                {
                                    if (currentQuery != null)
                                    {
                                        if (this.defaultOperator == Operator.OR)
                                        {
                                            q.Add(currentQuery);
                                        }
                                        else
                                        {
                                            q.Add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
                                        }
                                    }
                                    currentQuery = new FieldQueryNode(field, term, -1, -1);
                                }
                            }
                            if (this.defaultOperator == Operator.OR)
                            {
                                q.Add(currentQuery);
                            }
                            else
                            {
                                q.Add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
                            }

                            if (q is BooleanQueryNode)
                            {
                                q = new GroupQueryNode(q);
                            }
                            return(q);
                        }
                    }
                    else
                    {
                        // phrase query:
                        MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();

                        List <FieldQueryNode> multiTerms = new List <FieldQueryNode>();
                        int position       = -1;
                        int i              = 0;
                        int termGroupCount = 0;
                        for (; i < numTokens; i++)
                        {
                            string term = null;
                            int    positionIncrement = 1;
                            try
                            {
                                bool hasNext = buffer.IncrementToken();
                                Debug.Assert(hasNext == true);
                                term = termAtt.ToString();
                                if (posIncrAtt != null)
                                {
                                    positionIncrement = posIncrAtt.PositionIncrement;
                                }
                            }
#pragma warning disable 168
                            catch (IOException e)
#pragma warning restore 168
                            {
                                // safe to ignore, because we know the number of tokens
                            }

                            if (positionIncrement > 0 && multiTerms.Count > 0)
                            {
                                foreach (FieldQueryNode termNode in multiTerms)
                                {
                                    if (this.positionIncrementsEnabled)
                                    {
                                        termNode.PositionIncrement = position;
                                    }
                                    else
                                    {
                                        termNode.PositionIncrement = termGroupCount;
                                    }

                                    mpq.Add(termNode);
                                }

                                // Only increment once for each "group" of
                                // terms that were in the same position:
                                termGroupCount++;

                                multiTerms.Clear();
                            }

                            position += positionIncrement;
                            multiTerms.Add(new FieldQueryNode(field, term, -1, -1));
                        }

                        foreach (FieldQueryNode termNode in multiTerms)
                        {
                            if (this.positionIncrementsEnabled)
                            {
                                termNode.PositionIncrement = position;
                            }
                            else
                            {
                                termNode.PositionIncrement = termGroupCount;
                            }

                            mpq.Add(termNode);
                        }

                        return(mpq);
                    }
                }
                else
                {
                    TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();

                    int position = -1;

                    for (int i = 0; i < numTokens; i++)
                    {
                        string term = null;
                        int    positionIncrement = 1;

                        try
                        {
                            bool hasNext = buffer.IncrementToken();
                            Debug.Assert(hasNext == true);
                            term = termAtt.ToString();

                            if (posIncrAtt != null)
                            {
                                positionIncrement = posIncrAtt.PositionIncrement;
                            }
                        }
#pragma warning disable 168
                        catch (IOException e)
#pragma warning restore 168
                        {
                            // safe to ignore, because we know the number of tokens
                        }

                        FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);

                        if (this.positionIncrementsEnabled)
                        {
                            position += positionIncrement;
                            newFieldNode.PositionIncrement = position;
                        }
                        else
                        {
                            newFieldNode.PositionIncrement = i;
                        }

                        pq.Add(newFieldNode);
                    }

                    return(pq);
                }
            }

            return(node);
        }
Beispiel #29
0
        public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context)
        {
            string      fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
            IndexOutput output   = directory.CreateOutput(fileName, context);
            bool        success  = false;

            try
            {
                output.WriteVInt(FORMAT_PREFLEX_RW);
                output.WriteVInt(infos.Size());
                foreach (FieldInfo fi in infos)
                {
                    sbyte bits = 0x0;
                    if (fi.HasVectors())
                    {
                        bits |= STORE_TERMVECTOR;
                    }
                    if (fi.OmitsNorms())
                    {
                        bits |= OMIT_NORMS;
                    }
                    if (fi.HasPayloads())
                    {
                        bits |= STORE_PAYLOADS;
                    }
                    if (fi.Indexed)
                    {
                        bits |= IS_INDEXED;
                        Debug.Assert(fi.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads());
                        if (fi.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
                        {
                            bits |= OMIT_TERM_FREQ_AND_POSITIONS;
                        }
                        else if (fi.FieldIndexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS)
                        {
                            bits |= OMIT_POSITIONS;
                        }
                    }
                    output.WriteString(fi.Name);

                    /*
                     * we need to write the field number since IW tries
                     * to stabelize the field numbers across segments so the
                     * FI ordinal is not necessarily equivalent to the field number
                     */
                    output.WriteInt(fi.Number);
                    output.WriteByte(bits);
                    if (fi.Indexed && !fi.OmitsNorms())
                    {
                        // to allow null norm types we need to indicate if norms are written
                        // only in RW case
                        output.WriteByte((sbyte)(fi.NormType == null ? 0 : 1));
                    }
                    Debug.Assert(fi.Attributes() == null); // not used or supported
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    output.Dispose();
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(output);
                }
            }
        }
        /// <summary>
        /// Implements the opening of a new <seealso cref="DirectoryTaxonomyReader"/> instance if
        /// the taxonomy has changed.
        ///
        /// <para>
        /// <b>NOTE:</b> the returned <seealso cref="DirectoryTaxonomyReader"/> shares the
        /// ordinal and category caches with this reader. This is not expected to cause
        /// any issues, unless the two instances continue to live. The reader
        /// guarantees that the two instances cannot affect each other in terms of
        /// correctness of the caches, however if the size of the cache is changed
        /// through <seealso cref="#setCacheSize(int)"/>, it will affect both reader instances.
        /// </para>
        /// </summary>
        protected override TaxonomyReader DoOpenIfChanged()
        {
            EnsureOpen();

            // This works for both NRT and non-NRT readers (i.e. an NRT reader remains NRT).
            var r2 = DirectoryReader.OpenIfChanged(indexReader);

            if (r2 == null)
            {
                return(null); // no changes, nothing to do
            }

            // check if the taxonomy was recreated
            bool success = false;

            try
            {
                bool recreated = false;
                if (taxoWriter == null)
                {
                    // not NRT, check epoch from commit data
                    string t1 = indexReader.IndexCommit.UserData[DirectoryTaxonomyWriter.INDEX_EPOCH];
                    string t2 = r2.IndexCommit.UserData[DirectoryTaxonomyWriter.INDEX_EPOCH];
                    if (t1 == null)
                    {
                        if (t2 != null)
                        {
                            recreated = true;
                        }
                    }
                    else if (!t1.Equals(t2))
                    {
                        // t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data.
                        // it's ok to use String.equals because we require the two epoch values to be the same.
                        recreated = true;
                    }
                }
                else
                {
                    // NRT, compare current taxoWriter.epoch() vs the one that was given at construction
                    if (taxoEpoch != taxoWriter.TaxonomyEpoch)
                    {
                        recreated = true;
                    }
                }

                DirectoryTaxonomyReader newtr;
                if (recreated)
                {
                    // if recreated, do not reuse anything from this instace. the information
                    // will be lazily computed by the new instance when needed.
                    newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null);
                }
                else
                {
                    newtr = new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays);
                }

                success = true;
                return(newtr);
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(r2);
                }
            }
        }