Exemplo n.º 1
0
        /// <summary> Create a field by specifying its name, value and how it will
        /// be saved in the index.
        ///
        /// </summary>
        /// <param name="name">The name of the field
        /// </param>
        /// <param name="internName">Whether to .intern() name or not
        /// </param>
        /// <param name="value">The string to process
        /// </param>
        /// <param name="store">Whether <c>value</c> should be stored in the index
        /// </param>
        /// <param name="index">Whether the field should be indexed, and if so, if it should
        /// be tokenized before indexing
        /// </param>
        /// <param name="termVector">Whether term vector should be stored
        /// </param>
        /// <throws>  NullPointerException if name or value is <c>null</c> </throws>
        /// <throws>  IllegalArgumentException in any of the following situations: </throws>
        /// <summary> <list>
        /// <item>the field is neither stored nor indexed</item>
        /// <item>the field is not indexed but termVector is <c>TermVector.YES</c></item>
        /// </list>
        /// </summary>
        public Field(System.String name, bool internName, System.String value, Store store, Index index, TermVector termVector)
        {
            if (name == null)
            {
                throw new System.NullReferenceException("name cannot be null");
            }
            if (value == null)
            {
                throw new System.NullReferenceException("value cannot be null");
            }
            if (name.Length == 0 && value.Length == 0)
            {
                throw new System.ArgumentException("name and value cannot both be empty");
            }
            if (index == Index.NO && store == Store.NO)
            {
                throw new System.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
            }
            if (index == Index.NO && termVector != TermVector.NO)
            {
                throw new System.ArgumentException("cannot store term vector information " + "for a field that is not indexed");
            }

            if (internName)
            {
                // field names are optionally interned
                name = StringHelper.Intern(name);
            }

            this.internalName = name;

            this.fieldsData = value;

            this.internalIsStored = store.IsStored();

            this.internalIsIndexed   = index.IsIndexed();
            this.internalIsTokenized = index.IsAnalyzed();
            this.internalOmitNorms   = index.OmitNorms();

            if (index == Index.NO)
            {
                this.internalOmitTermFreqAndPositions = false;
            }

            this.internalIsBinary = false;

            SetStoreTermVector(termVector);
        }
Exemplo n.º 2
0
        internal static int DetectFieldType(IndexReader reader, System.String fieldKey)
        {
            System.String field      = StringHelper.Intern(fieldKey);
            TermEnum      enumerator = reader.Terms(new Term(field));

            try
            {
                Term term = enumerator.Term();
                if (term == null)
                {
                    throw new System.SystemException("no terms in field " + field + " - cannot determine sort type");
                }
                int ret = 0;
                if ((System.Object)term.Field() == (System.Object)field)
                {
                    System.String termtext = term.Text().Trim();

                    int tmpI32; long tmpI64; float tmpF;
                    if (System.Int32.TryParse(termtext, out tmpI32))
                    {
                        ret = SortField.INT;
                    }
                    else if (System.Int64.TryParse(termtext, out tmpI64))
                    {
                        ret = SortField.LONG;
                    }
                    else if (SupportClass.Single.TryParse(termtext, out tmpF))
                    {
                        ret = SortField.FLOAT;
                    }
                    else
                    {
                        ret = SortField.STRING;
                    }
                }
                else
                {
                    throw new System.SystemException("field \"" + field + "\" does not appear to be indexed");
                }
                return(ret);
            }
            finally
            {
                enumerator.Close();
            }
        }
Exemplo n.º 3
0
        protected internal AbstractField(System.String name, Field.Store store, Field.Index index, Field.TermVector termVector)
        {
            if (name == null)
            {
                throw new System.NullReferenceException("name cannot be null");
            }
            this.internalName = StringHelper.Intern(name); // field names are interned

            this.internalIsStored    = store.IsStored();
            this.internalIsIndexed   = index.IsIndexed();
            this.internalIsTokenized = index.IsAnalyzed();
            this.internalOmitNorms   = index.OmitNorms();

            this.internalIsBinary = false;

            SetStoreTermVector(termVector);
        }
Exemplo n.º 4
0
        /// <summary> Create a stored field with binary value. Optionally the value may be compressed.
        ///
        /// </summary>
        /// <param name="name">The name of the field
        /// </param>
        /// <param name="value">The binary value
        /// </param>
        /// <param name="offset">Starting offset in value where this Field's bytes are
        /// </param>
        /// <param name="length">Number of bytes to use for this Field, starting at offset
        /// </param>
        /// <param name="store">How <code>value</code> should be stored (compressed or not)
        /// </param>
        /// <throws>  IllegalArgumentException if store is <code>Store.NO</code>  </throws>
        public Field(System.String name, byte[] value_Renamed, int offset, int length, Store store)
        {
            if (name == null)
            {
                throw new System.ArgumentException("name cannot be null");
            }
            if (value_Renamed == null)
            {
                throw new System.ArgumentException("value cannot be null");
            }

            this.name  = StringHelper.Intern(name);            // field names are interned
            fieldsData = value_Renamed;

            if (store == Store.YES)
            {
                isStored     = true;
                isCompressed = false;
            }
            else if (store == Store.COMPRESS)
            {
                isStored     = true;
                isCompressed = true;
            }
            else if (store == Store.NO)
            {
                throw new System.ArgumentException("binary values can't be unstored");
            }
            else
            {
                throw new System.ArgumentException("unknown store parameter " + store);
            }

            isIndexed   = false;
            isTokenized = false;
            omitTermFreqAndPositions = false;
            omitNorms = true;

            isBinary     = true;
            binaryLength = length;
            binaryOffset = offset;

            SetStoreTermVector(TermVector.NO);
        }
Exemplo n.º 5
0
            public FieldForMerge(System.Object value_Renamed, FieldInfo fi, bool binary, bool compressed, bool tokenize)
            {
                this.isStored     = true;
                this.fieldsData   = value_Renamed;
                this.isCompressed = compressed;
                this.isBinary     = binary;
                if (binary)
                {
                    binaryLength = ((byte[])value_Renamed).Length;
                }

                this.isTokenized = tokenize;

                this.name      = StringHelper.Intern(fi.name);
                this.isIndexed = fi.isIndexed;
                this.omitNorms = fi.omitNorms;
                this.omitTermFreqAndPositions    = fi.omitTermFreqAndPositions;
                this.storeOffsetWithTermVector   = fi.storeOffsetWithTermVector;
                this.storePositionWithTermVector = fi.storePositionWithTermVector;
                this.storeTermVector             = fi.storeTermVector;
            }
Exemplo n.º 6
0
        internal NumericRangeQuery(string field, int precisionStep, int valSize, T?min, T?max, bool minInclusive, bool maxInclusive)
        {
            System.Diagnostics.Debug.Assert((valSize == 32 || valSize == 64));
            if (precisionStep < 1)
            {
                throw new System.ArgumentException("precisionStep must be >=1");
            }
            this.field         = StringHelper.Intern(field);
            this.precisionStep = precisionStep;
            this.valSize       = valSize;
            this.min           = min;
            this.max           = max;
            this.minInclusive  = minInclusive;
            this.maxInclusive  = maxInclusive;

            // For bigger precisionSteps this query likely
            // hits too many terms, so set to CONSTANT_SCORE_FILTER right off
            // (especially as the FilteredTermEnum is costly if wasted only for AUTO tests because it
            // creates new enums from IndexReader for each sub-range)
            switch (valSize)
            {
            case 64:
                RewriteMethod = (precisionStep > 6)?CONSTANT_SCORE_FILTER_REWRITE:CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
                break;

            case 32:
                RewriteMethod = (precisionStep > 8)?CONSTANT_SCORE_FILTER_REWRITE:CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
                break;

            default:
                // should never happen
                throw new System.ArgumentException("valSize must be 32 or 64");
            }

            // shortcut if upper bound == lower bound
            if (min != null && min.Equals(max))
            {
                RewriteMethod = CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE;
            }
        }
Exemplo n.º 7
0
        /// <summary> Create a tokenized and indexed field that is not stored, optionally with
        /// storing term vectors.  The Reader is read only when the Document is added to the index,
        /// i.e. you may not close the Reader until <see cref="IndexWriter.AddDocument(Document)" />
        /// has been called.
        ///
        /// </summary>
        /// <param name="name">The name of the field
        /// </param>
        /// <param name="reader">The reader with the content
        /// </param>
        /// <param name="termVector">Whether term vector should be stored
        /// </param>
        /// <throws>  NullPointerException if name or reader is <c>null</c> </throws>
        public Field(System.String name, System.IO.TextReader reader, TermVector termVector)
        {
            if (name == null)
            {
                throw new System.NullReferenceException("name cannot be null");
            }
            if (reader == null)
            {
                throw new System.NullReferenceException("reader cannot be null");
            }

            this.internalName = StringHelper.Intern(name); // field names are interned
            this.fieldsData   = reader;

            this.internalIsStored = false;

            this.internalIsIndexed   = true;
            this.internalIsTokenized = true;

            this.internalIsBinary = false;

            SetStoreTermVector(termVector);
        }
Exemplo n.º 8
0
        /// <summary> Create a tokenized and indexed field that is not stored, optionally with
        /// storing term vectors.  This is useful for pre-analyzed fields.
        /// The TokenStream is read only when the Document is added to the index,
        /// i.e. you may not close the TokenStream until <see cref="IndexWriter.AddDocument(Document)" />
        /// has been called.
        ///
        /// </summary>
        /// <param name="name">The name of the field
        /// </param>
        /// <param name="tokenStream">The TokenStream with the content
        /// </param>
        /// <param name="termVector">Whether term vector should be stored
        /// </param>
        /// <throws>  NullPointerException if name or tokenStream is <c>null</c> </throws>
        public Field(System.String name, TokenStream tokenStream, TermVector termVector)
        {
            if (name == null)
            {
                throw new System.NullReferenceException("name cannot be null");
            }
            if (tokenStream == null)
            {
                throw new System.NullReferenceException("tokenStream cannot be null");
            }

            this.internalName = StringHelper.Intern(name); // field names are interned
            this.fieldsData   = null;
            this.tokenStream  = tokenStream;

            this.internalIsStored = false;

            this.internalIsIndexed   = true;
            this.internalIsTokenized = true;

            this.internalIsBinary = false;

            SetStoreTermVector(termVector);
        }
Exemplo n.º 9
0
        /// <summary> Create a field by specifying its name, value and how it will
        /// be saved in the index.
        ///
        /// </summary>
        /// <param name="name">The name of the field
        /// </param>
        /// <param name="internName">Whether to .intern() name or not
        /// </param>
        /// <param name="value">The string to process
        /// </param>
        /// <param name="store">Whether <code>value</code> should be stored in the index
        /// </param>
        /// <param name="index">Whether the field should be indexed, and if so, if it should
        /// be tokenized before indexing
        /// </param>
        /// <param name="termVector">Whether term vector should be stored
        /// </param>
        /// <throws>  NullPointerException if name or value is <code>null</code> </throws>
        /// <throws>  IllegalArgumentException in any of the following situations: </throws>
        /// <summary> <ul>
        /// <li>the field is neither stored nor indexed</li>
        /// <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
        /// </ul>
        /// </summary>
        public Field(System.String name, bool internName, System.String value_Renamed, Store store, Index index, TermVector termVector)
        {
            if (name == null)
            {
                throw new System.NullReferenceException("name cannot be null");
            }
            if (value_Renamed == null)
            {
                throw new System.NullReferenceException("value cannot be null");
            }
            if (name.Length == 0 && value_Renamed.Length == 0)
            {
                throw new System.ArgumentException("name and value cannot both be empty");
            }
            if (index == Index.NO && store == Store.NO)
            {
                throw new System.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
            }
            if (index == Index.NO && termVector != TermVector.NO)
            {
                throw new System.ArgumentException("cannot store term vector information " + "for a field that is not indexed");
            }

            if (internName)
            {
                // field names are optionally interned
                name = StringHelper.Intern(name);
            }

            this.name = name;

            this.fieldsData = value_Renamed;

            if (store == Store.YES)
            {
                this.isStored     = true;
                this.isCompressed = false;
            }
            else if (store == Store.COMPRESS)
            {
                this.isStored     = true;
                this.isCompressed = true;
            }
            else if (store == Store.NO)
            {
                this.isStored     = false;
                this.isCompressed = false;
            }
            else
            {
                throw new System.ArgumentException("unknown store parameter " + store);
            }

            if (index == Index.NO)
            {
                this.isIndexed   = false;
                this.isTokenized = false;
                this.omitTermFreqAndPositions = false;
                this.omitNorms = true;
            }
            else if (index == Index.ANALYZED)
            {
                this.isIndexed   = true;
                this.isTokenized = true;
            }
            else if (index == Index.NOT_ANALYZED)
            {
                this.isIndexed   = true;
                this.isTokenized = false;
            }
            else if (index == Index.NOT_ANALYZED_NO_NORMS)
            {
                this.isIndexed   = true;
                this.isTokenized = false;
                this.omitNorms   = true;
            }
            else if (index == Index.ANALYZED_NO_NORMS)
            {
                this.isIndexed   = true;
                this.isTokenized = true;
                this.omitNorms   = true;
            }
            else
            {
                throw new System.ArgumentException("unknown index parameter " + index);
            }

            this.isBinary = false;

            SetStoreTermVector(termVector);
        }
Exemplo n.º 10
0
        protected internal AbstractField(System.String name, Field.Store store, Field.Index index, Field.TermVector termVector)
        {
            if (name == null)
            {
                throw new System.NullReferenceException("name cannot be null");
            }
            this.name = StringHelper.Intern(name);             // field names are interned

            if (store == Field.Store.YES)
            {
                this.isStored     = true;
                this.isCompressed = false;
            }
            else if (store == Field.Store.COMPRESS)
            {
                this.isStored     = true;
                this.isCompressed = true;
            }
            else if (store == Field.Store.NO)
            {
                this.isStored     = false;
                this.isCompressed = false;
            }
            else
            {
                throw new System.ArgumentException("unknown store parameter " + store);
            }

            if (index == Field.Index.NO)
            {
                this.isIndexed   = false;
                this.isTokenized = false;
            }
            else if (index == Field.Index.ANALYZED)
            {
                this.isIndexed   = true;
                this.isTokenized = true;
            }
            else if (index == Field.Index.NOT_ANALYZED)
            {
                this.isIndexed   = true;
                this.isTokenized = false;
            }
            else if (index == Field.Index.NOT_ANALYZED_NO_NORMS)
            {
                this.isIndexed   = true;
                this.isTokenized = false;
                this.omitNorms   = true;
            }
            else if (index == Field.Index.ANALYZED_NO_NORMS)
            {
                this.isIndexed   = true;
                this.isTokenized = true;
                this.omitNorms   = true;
            }
            else
            {
                throw new System.ArgumentException("unknown index parameter " + index);
            }

            this.isBinary = false;

            SetStoreTermVector(termVector);
        }
Exemplo n.º 11
0
            internal object custom; // which custom comparator or parser

            /// <summary>Creates one of these objects for a custom comparator/parser. </summary>
            internal Entry(string field, object custom)
            {
                this.field  = StringHelper.Intern(field);
                this.custom = custom;
            }
Exemplo n.º 12
0
        /// <summary> Add a complete document specified by all its term vectors. If document has no
        /// term vectors, add value for tvx.
        ///
        /// </summary>
        /// <param name="vectors">
        /// </param>
        /// <throws>  IOException </throws>
        public void  AddAllDocVectors(TermFreqVector[] vectors)
        {
            tvx.WriteLong(tvd.GetFilePointer());
            tvx.WriteLong(tvf.GetFilePointer());

            if (vectors != null)
            {
                int numFields = vectors.Length;
                tvd.WriteVInt(numFields);

                long[] fieldPointers = new long[numFields];

                for (int i = 0; i < numFields; i++)
                {
                    fieldPointers[i] = tvf.GetFilePointer();

                    int fieldNumber = fieldInfos.FieldNumber(vectors[i].GetField());

                    // 1st pass: write field numbers to tvd
                    tvd.WriteVInt(fieldNumber);

                    int numTerms = vectors[i].Size();
                    tvf.WriteVInt(numTerms);

                    TermPositionVector tpVector;

                    byte bits;
                    bool storePositions;
                    bool storeOffsets;

                    if (vectors[i] is TermPositionVector)
                    {
                        // May have positions & offsets
                        tpVector       = (TermPositionVector)vectors[i];
                        storePositions = tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null;
                        storeOffsets   = tpVector.Size() > 0 && tpVector.GetOffsets(0) != null;
                        bits           = (byte)((storePositions ? TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR : (byte)0) + (storeOffsets ? TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR : (byte)0));
                    }
                    else
                    {
                        tpVector       = null;
                        bits           = 0;
                        storePositions = false;
                        storeOffsets   = false;
                    }

                    tvf.WriteVInt(bits);

                    string[] terms = vectors[i].GetTerms();
                    int[]    freqs = vectors[i].GetTermFrequencies();

                    int utf8Upto = 0;
                    utf8Results[1].length = 0;

                    for (int j = 0; j < numTerms; j++)
                    {
                        UnicodeUtil.UTF16toUTF8(terms[j], 0, terms[j].Length, utf8Results[utf8Upto]);

                        int start = StringHelper.bytesDifference(
                            utf8Results[1 - utf8Upto].result, utf8Results[1 - utf8Upto].length, utf8Results[utf8Upto].result, utf8Results[utf8Upto].length);

                        int length = utf8Results[utf8Upto].length - start;
                        tvf.WriteVInt(start);                                        // write shared prefix length
                        tvf.WriteVInt(length);                                       // write delta length
                        tvf.WriteBytes(utf8Results[utf8Upto].result, start, length); // write delta bytes
                        utf8Upto = 1 - utf8Upto;

                        int termFreq = freqs[j];

                        tvf.WriteVInt(termFreq);

                        if (storePositions)
                        {
                            int[] positions = tpVector.GetTermPositions(j);
                            if (positions == null)
                            {
                                throw new System.SystemException("Trying to write positions that are null!");
                            }
                            System.Diagnostics.Debug.Assert(positions.Length == termFreq);

                            // use delta encoding for positions
                            int lastPosition = 0;
                            for (int k = 0; k < positions.Length; k++)
                            {
                                int position = positions[k];
                                tvf.WriteVInt(position - lastPosition);
                                lastPosition = position;
                            }
                        }

                        if (storeOffsets)
                        {
                            TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j);
                            if (offsets == null)
                            {
                                throw new System.SystemException("Trying to write offsets that are null!");
                            }
                            System.Diagnostics.Debug.Assert(offsets.Length == termFreq);

                            // use delta encoding for offsets
                            int lastEndOffset = 0;
                            for (int k = 0; k < offsets.Length; k++)
                            {
                                int startOffset = offsets[k].GetStartOffset();
                                int endOffset   = offsets[k].GetEndOffset();
                                tvf.WriteVInt(startOffset - lastEndOffset);
                                tvf.WriteVInt(endOffset - startOffset);
                                lastEndOffset = endOffset;
                            }
                        }
                    }
                }

                // 2nd pass: write field pointers to tvd
                long lastFieldPointer = fieldPointers[0];
                for (int i = 1; i < numFields; i++)
                {
                    long fieldPointer = fieldPointers[i];
                    tvd.WriteVLong(fieldPointer - lastFieldPointer);
                    lastFieldPointer = fieldPointer;
                }
            }
            else
            {
                tvd.WriteVInt(0);
            }
        }
Exemplo n.º 13
0
 /// <summary>Constructs a Term with the given field and text.
 /// <p/>Note that a null field or null text value results in undefined
 /// behavior for most Lucene APIs that accept a Term parameter.
 /// </summary>
 public Term(System.String fld, System.String txt)
 {
     field = StringHelper.Intern(fld);
     text  = txt;
 }
Exemplo n.º 14
0
 /// <summary>Constructs a Term with the given field and text.
 /// <p/>Note that a null field or null text value results in undefined
 /// behavior for most Lucene APIs that accept a Term parameter.
 /// </summary>
 public Term(string fld, string txt)
 {
     field = StringHelper.Intern(fld);
     text  = txt;
 }
Exemplo n.º 15
0
 private Term(System.Runtime.Serialization.SerializationInfo info, System.Runtime.Serialization.StreamingContext context)
 {
     text  = (string)info.GetValue("text", typeof(string));
     field = StringHelper.Intern((string)info.GetValue("field", typeof(string)));
 }
Exemplo n.º 16
0
            protected internal override object CreateValue(IndexReader reader, Entry entryKey)
            {
                string field = StringHelper.Intern(entryKey.field);

                int[]    retArray = new int[reader.MaxDoc];
                string[] mterms   = new string[reader.MaxDoc + 1];
                TermDocs termDocs = reader.TermDocs();
                TermEnum termEnum = reader.Terms(new Term(field));
                int      t        = 0; // current term number

                // an entry for documents that have no terms in this field
                // should a document with no terms be at top or bottom?
                // this puts them at the top - if it is changed, FieldDocSortedHitQueue
                // needs to change as well.
                mterms[t++] = null;

                try
                {
                    do
                    {
                        Term term = termEnum.Term;
                        if (term == null || term.Field != field || t >= mterms.Length)
                        {
                            break;
                        }

                        // store term text
                        mterms[t] = term.Text;

                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc] = t;
                        }

                        t++;
                    }while (termEnum.Next());
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }

                if (t == 0)
                {
                    // if there are no terms, make the term array
                    // have a single null entry
                    mterms = new string[1];
                }
                else if (t < mterms.Length)
                {
                    // if there are less terms than documents,
                    // trim off the dead array space
                    string[] terms = new string[t];
                    Array.Copy(mterms, 0, terms, 0, t);
                    mterms = terms;
                }

                StringIndex value_Renamed = new StringIndex(retArray, mterms);

                return(value_Renamed);
            }
Exemplo n.º 17
0
 internal void OnDeserialized(System.Runtime.Serialization.StreamingContext context)
 {
     field = StringHelper.Intern(field);
 }
Exemplo n.º 18
0
        private void  WriteField()
        {
            // remember where this field is written
            currentField.tvfPointer = tvf.GetFilePointer();
            //System.out.println("Field Pointer: " + currentField.tvfPointer);

            int size = terms.Count;

            tvf.WriteVInt(size);

            bool storePositions = currentField.storePositions;
            bool storeOffsets   = currentField.storeOffsets;
            byte bits           = (byte)(0x0);

            if (storePositions)
            {
                bits |= STORE_POSITIONS_WITH_TERMVECTOR;
            }
            if (storeOffsets)
            {
                bits |= STORE_OFFSET_WITH_TERMVECTOR;
            }
            tvf.WriteByte(bits);

            System.String lastTermText = "";
            for (int i = 0; i < size; i++)
            {
                TVTerm term   = (TVTerm)terms[i];
                int    start  = StringHelper.StringDifference(lastTermText, term.termText);
                int    length = term.termText.Length - start;
                tvf.WriteVInt(start);                         // write shared prefix length
                tvf.WriteVInt(length);                        // write delta length
                tvf.WriteChars(term.termText, start, length); // write delta chars
                tvf.WriteVInt(term.freq);
                lastTermText = term.termText;

                if (storePositions)
                {
                    if (term.positions == null)
                    {
                        throw new System.SystemException("Trying to write positions that are null!");
                    }

                    // use delta encoding for positions
                    int position = 0;
                    for (int j = 0; j < term.freq; j++)
                    {
                        tvf.WriteVInt(term.positions[j] - position);
                        position = term.positions[j];
                    }
                }

                if (storeOffsets)
                {
                    if (term.offsets == null)
                    {
                        throw new System.SystemException("Trying to write offsets that are null!");
                    }

                    // use delta encoding for offsets
                    int position = 0;
                    for (int j = 0; j < term.freq; j++)
                    {
                        tvf.WriteVInt(term.offsets[j].GetStartOffset() - position);
                        tvf.WriteVInt(term.offsets[j].GetEndOffset() - term.offsets[j].GetStartOffset());                         //Save the diff between the two.
                        position = term.offsets[j].GetEndOffset();
                    }
                }
            }
        }
Exemplo n.º 19
0
 internal Term(System.String fld, System.String txt, bool intern)
 {
     field = intern?StringHelper.Intern(fld):fld; // field names are interned
     text  = txt;                                 // unless already known to be
 }
Exemplo n.º 20
0
        public static void  VerifyEquals(IndexReader r1, IndexReader r2, System.String idField)
        {
            Assert.AreEqual(r1.NumDocs(), r2.NumDocs());
            bool hasDeletes = !(r1.MaxDoc() == r2.MaxDoc() && r1.NumDocs() == r1.MaxDoc());

            int[] r2r1 = new int[r2.MaxDoc()];             // r2 id to r1 id mapping

            TermDocs termDocs1 = r1.TermDocs();
            TermDocs termDocs2 = r2.TermDocs();

            // create mapping from id2 space to id2 based on idField
            idField = StringHelper.Intern(idField);
            TermEnum termEnum = r1.Terms(new Term(idField, ""));

            do
            {
                Term term = termEnum.Term();
                if (term == null || (System.Object)term.Field() != (System.Object)idField)
                {
                    break;
                }

                termDocs1.Seek(termEnum);
                if (!termDocs1.Next())
                {
                    // This doc is deleted and wasn't replaced
                    termDocs2.Seek(termEnum);
                    Assert.IsFalse(termDocs2.Next());
                    continue;
                }

                int id1 = termDocs1.Doc();
                Assert.IsFalse(termDocs1.Next());

                termDocs2.Seek(termEnum);
                Assert.IsTrue(termDocs2.Next());
                int id2 = termDocs2.Doc();
                Assert.IsFalse(termDocs2.Next());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (System.Exception t)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    System.Console.Out.WriteLine("  d1=" + r1.Document(id1));
                    System.Console.Out.WriteLine("  d2=" + r2.Document(id2));
                    throw t;
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermFreqVectors(id1), r2.GetTermFreqVectors(id2));
                }
                catch (System.Exception e)
                {
                    System.Console.Out.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    TermFreqVector[] tv1 = r1.GetTermFreqVectors(id1);
                    System.Console.Out.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        for (int i = 0; i < tv1.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv1[i]);
                        }
                    }

                    TermFreqVector[] tv2 = r2.GetTermFreqVectors(id2);
                    System.Console.Out.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        for (int i = 0; i < tv2.Length; i++)
                        {
                            System.Console.Out.WriteLine("    " + i + ": " + tv2[i]);
                        }
                    }

                    throw e;
                }
            }while (termEnum.Next());

            termEnum.Close();

            // Verify postings
            TermEnum termEnum1 = r1.Terms(new Term("", ""));
            TermEnum termEnum2 = r2.Terms(new Term("", ""));

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs()];
            long[] info2 = new long[r2.NumDocs()];

            for (; ;)
            {
                Term term1, term2;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1  = 0;
                    term1 = termEnum1.Term();
                    if (term1 == null)
                    {
                        break;
                    }
                    termDocs1.Seek(termEnum1);
                    while (termDocs1.Next())
                    {
                        int d1 = termDocs1.Doc();
                        int f1 = termDocs1.Freq();
                        info1[len1] = (((long)d1) << 32) | f1;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                    if (!termEnum1.Next())
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2  = 0;
                    term2 = termEnum2.Term();
                    if (term2 == null)
                    {
                        break;
                    }
                    termDocs2.Seek(termEnum2);
                    while (termDocs2.Next())
                    {
                        int d2 = termDocs2.Doc();
                        int f2 = termDocs2.Freq();
                        info2[len2] = (((long)r2r1[d2]) << 32) | f2;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                    if (!termEnum2.Next())
                    {
                        break;
                    }
                }

                if (!hasDeletes)
                {
                    Assert.AreEqual(termEnum1.DocFreq(), termEnum2.DocFreq());
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0)
                {
                    break;                     // no more terms
                }
                Assert.AreEqual(term1, term2);

                // sort info2 to get it into ascending docid
                System.Array.Sort(info2, 0, len2 - 0);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i]);
                }

                termEnum1.Next();
                termEnum2.Next();
            }
        }
Exemplo n.º 21
0
            protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey)
            {
                System.String   field    = StringHelper.Intern((System.String)entryKey.field);
                int[]           retArray = new int[reader.MaxDoc()];
                System.String[] mterms   = new System.String[reader.MaxDoc() + 1];
                TermDocs        termDocs = reader.TermDocs();
                TermEnum        termEnum = reader.Terms(new Term(field));
                int             t        = 0; // current term number

                // an entry for documents that have no terms in this field
                // should a document with no terms be at top or bottom?
                // this puts them at the top - if it is changed, FieldDocSortedHitQueue
                // needs to change as well.
                mterms[t++] = null;

                try
                {
                    do
                    {
                        Term term = termEnum.Term();
                        if (term == null || (System.Object)term.Field() != (System.Object)field)
                        {
                            break;
                        }

                        // store term text
                        // we expect that there is at most one term per document
                        if (t >= mterms.Length)
                        {
                            throw new System.SystemException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields");
                        }
                        mterms[t] = term.Text();

                        termDocs.Seek(termEnum);
                        while (termDocs.Next())
                        {
                            retArray[termDocs.Doc()] = t;
                        }

                        t++;
                    }while (termEnum.Next());
                }
                finally
                {
                    termDocs.Close();
                    termEnum.Close();
                }

                if (t == 0)
                {
                    // if there are no terms, make the term array
                    // have a single null entry
                    mterms = new System.String[1];
                }
                else if (t < mterms.Length)
                {
                    // if there are less terms than documents,
                    // trim off the dead array space
                    System.String[] terms = new System.String[t];
                    Array.Copy(mterms, 0, terms, 0, t);
                    mterms = terms;
                }

                StringIndex value_Renamed = new StringIndex(retArray, mterms);

                return(value_Renamed);
            }
Exemplo n.º 22
0
            internal System.Object custom; // which custom comparator or parser

            /// <summary>Creates one of these objects for a custom comparator/parser. </summary>
            internal Entry(System.String field, System.Object custom)
            {
                this.field  = StringHelper.Intern(field);
                this.custom = custom;
            }