Beispiel #1
0
 public static void  CheckNorms(IndexReader reader)
 {
     // test omit norms
     for (int i = 0; i < DocHelper.fields.Length; i++)
     {
         Fieldable f = DocHelper.fields[i];
         if (f.IsIndexed())
         {
             Assert.AreEqual(reader.HasNorms(f.Name()), !f.GetOmitNorms());
             Assert.AreEqual(reader.HasNorms(f.Name()), !DocHelper.noNorms.Contains(f.Name()));
             if (!reader.HasNorms(f.Name()))
             {
                 // test for fake norms of 1.0 or null depending on the flag
                 byte[] norms = reader.Norms(f.Name());
                 byte   norm1 = DefaultSimilarity.EncodeNorm(1.0f);
                 if (reader.GetDisableFakeNorms())
                 {
                     Assert.IsNull(norms);
                 }
                 else
                 {
                     Assert.AreEqual(norms.Length, reader.MaxDoc());
                     for (int j = 0; j < reader.MaxDoc(); j++)
                     {
                         Assert.AreEqual(norms[j], norm1);
                     }
                 }
                 norms = new byte[reader.MaxDoc()];
                 reader.Norms(f.Name(), norms, 0);
                 for (int j = 0; j < reader.MaxDoc(); j++)
                 {
                     Assert.AreEqual(norms[j], norm1);
                 }
             }
         }
     }
 }
Beispiel #2
0
 /// <summary>Adds field info for a Document. </summary>
 public void  Add(Document doc)
 {
     lock (this)
     {
         System.Collections.IList       fields        = doc.GetFields();
         System.Collections.IEnumerator fieldIterator = fields.GetEnumerator();
         while (fieldIterator.MoveNext())
         {
             Fieldable field = (Fieldable)fieldIterator.Current;
             Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());
         }
     }
 }
Beispiel #3
0
        static DocHelper()
        {
            textField1           = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
            textField2           = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            textField3           = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
            {
                textField3.SetOmitNorms(true);
            }
            keyField     = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED);
            noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
            noTFField    = new Field(NO_TF_KEY, NO_TF_TEXT, Field.Store.YES, Field.Index.ANALYZED);
            {
                noTFField.SetOmitTermFreqAndPositions(true);
            }
            unIndField     = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO);
            unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
            unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
            lazyField      = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
            textUtfField1  = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
            textUtfField2  = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            fields         = new Field[] { textField1, textField2, textField3, compressedTextField2, keyField, noNormsField, noTFField, unIndField, unStoredField1, unStoredField2, textUtfField1, textUtfField2, lazyField, lazyFieldBinary, largeLazyField };
            {
                //Initialize the large Lazy Field
                System.Text.StringBuilder buffer = new System.Text.StringBuilder();
                for (int i = 0; i < 10000; i++)
                {
                    buffer.Append("Lazily loading lengths of language in lieu of laughing ");
                }

                try
                {
                    LAZY_FIELD_BINARY_BYTES = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes");
                }
                catch (System.IO.IOException e)
                {
                }
                lazyFieldBinary           = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES);
                fields[fields.Length - 2] = lazyFieldBinary;
                LARGE_LAZY_FIELD_TEXT     = buffer.ToString();
                largeLazyField            = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);

                fields[fields.Length - 1] = largeLazyField;
                for (int i = 0; i < fields.Length; i++)
                {
                    Fieldable f = fields[i];
                    Add(all, f);
                    if (f.IsIndexed())
                    {
                        Add(indexed, f);
                    }
                    else
                    {
                        Add(unindexed, f);
                    }
                    if (f.IsTermVectorStored())
                    {
                        Add(termvector, f);
                    }
                    if (f.IsIndexed() && !f.IsTermVectorStored())
                    {
                        Add(notermvector, f);
                    }
                    if (f.IsStored())
                    {
                        Add(stored, f);
                    }
                    else
                    {
                        Add(unstored, f);
                    }
                    if (f.GetOmitNorms())
                    {
                        Add(noNorms, f);
                    }
                    if (f.GetOmitTf())
                    {
                        Add(noTf, f);
                    }
                    if (f.IsLazy())
                    {
                        Add(lazy, f);
                    }
                }
            }
            {
                nameValues = new System.Collections.Hashtable();
                nameValues[TEXT_FIELD_1_KEY]            = FIELD_1_TEXT;
                nameValues[TEXT_FIELD_2_KEY]            = FIELD_2_TEXT;
                nameValues[COMPRESSED_TEXT_FIELD_2_KEY] = FIELD_2_COMPRESSED_TEXT;
                nameValues[TEXT_FIELD_3_KEY]            = FIELD_3_TEXT;
                nameValues[KEYWORD_FIELD_KEY]           = KEYWORD_TEXT;
                nameValues[NO_NORMS_KEY]          = NO_NORMS_TEXT;
                nameValues[NO_TF_KEY]             = NO_TF_TEXT;
                nameValues[UNINDEXED_FIELD_KEY]   = UNINDEXED_FIELD_TEXT;
                nameValues[UNSTORED_FIELD_1_KEY]  = UNSTORED_1_FIELD_TEXT;
                nameValues[UNSTORED_FIELD_2_KEY]  = UNSTORED_2_FIELD_TEXT;
                nameValues[LAZY_FIELD_KEY]        = LAZY_FIELD_TEXT;
                nameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES;
                nameValues[LARGE_LAZY_FIELD_KEY]  = LARGE_LAZY_FIELD_TEXT;
                nameValues[TEXT_FIELD_UTF1_KEY]   = FIELD_UTF1_TEXT;
                nameValues[TEXT_FIELD_UTF2_KEY]   = FIELD_UTF2_TEXT;
            }
        }
Beispiel #4
0
        public override DocumentsWriter.DocWriter ProcessDocument()
        {
            consumer.StartDocument();
            fieldsWriter.StartDocument();

            Document doc = docState.doc;

            System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));

            fieldCount = 0;

            int thisFieldGen = fieldGen++;

            System.Collections.IList docFields = doc.GetFields();
            int numDocFields = docFields.Count;

            // Absorb any new fields first seen in this document.
            // Also absorb any changes to fields we had already
            // seen before (eg suddenly turning on norms or
            // vectors, etc.):

            for (int i = 0; i < numDocFields; i++)
            {
                Fieldable     field     = (Fieldable)docFields[i];
                System.String fieldName = field.Name();

                // Make sure we have a PerField allocated
                int hashPos = fieldName.GetHashCode() & hashMask;
                DocFieldProcessorPerField fp = fieldHash[hashPos];
                while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
                {
                    fp = fp.next;
                }

                if (fp == null)
                {
                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());

                    fp                 = new DocFieldProcessorPerField(this, fi);
                    fp.next            = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;
                    totalFieldCount++;

                    if (totalFieldCount >= fieldHash.Length / 2)
                    {
                        Rehash();
                    }
                }
                else
                {
                    fp.fieldInfo.Update(field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());
                }

                if (thisFieldGen != fp.lastGen)
                {
                    // First time we're seeing this field for this doc
                    fp.fieldCount = 0;

                    if (fieldCount == fields.Length)
                    {
                        int newSize = fields.Length * 2;
                        DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
                        Array.Copy(fields, 0, newArray, 0, fieldCount);
                        fields = newArray;
                    }

                    fields[fieldCount++] = fp;
                    fp.lastGen           = thisFieldGen;
                }

                if (fp.fieldCount == fp.fields.Length)
                {
                    Fieldable[] newArray = new Fieldable[fp.fields.Length * 2];
                    Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
                    fp.fields = newArray;
                }

                fp.fields[fp.fieldCount++] = field;
                if (field.IsStored())
                {
                    fieldsWriter.AddField(field, fp.fieldInfo);
                }
            }

            // If we are writing vectors then we must visit
            // fields in sorted order so they are written in
            // sorted order.  TODO: we actually only need to
            // sort the subset of fields that have vectors
            // enabled; we could save [small amount of] CPU
            // here.
            QuickSort(fields, 0, fieldCount - 1);

            for (int i = 0; i < fieldCount; i++)
            {
                fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);
            }

            if (docState.maxTermPrefix != null && docState.infoStream != null)
            {
                docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
                docState.maxTermPrefix = null;
            }

            DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
            DocumentsWriter.DocWriter two = consumer.FinishDocument();
            if (one == null)
            {
                return(two);
            }
            else if (two == null)
            {
                return(one);
            }
            else
            {
                PerDoc both = GetPerDoc();
                both.docID = docState.docID;
                System.Diagnostics.Debug.Assert(one.docID == docState.docID);
                System.Diagnostics.Debug.Assert(two.docID == docState.docID);
                both.one = one;
                both.two = two;
                return(both);
            }
        }