Beispiel #1
0
        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            System.Collections.IEnumerator fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                if (field.IsStored())
                {
                    WriteField(fieldInfos.FieldInfo(field.Name()), field);
                }
            }
        }
Beispiel #2
0
        /// <summary> Return the offsetGap from the analyzer assigned to field </summary>
        public override int GetOffsetGap(Lucene.Net.Documents.Fieldable field)
        {
            Analyzer analyzer = (Analyzer)analyzerMap[field.Name()];

            if (analyzer == null)
            {
                analyzer = defaultAnalyzer;
            }
            return(analyzer.GetOffsetGap(field));
        }
Beispiel #3
0
 /// <summary>Adds field info for a Document. </summary>
 public void  Add(Document doc)
 {
     System.Collections.IList       fields        = doc.GetFields();
     System.Collections.IEnumerator fieldIterator = fields.GetEnumerator();
     while (fieldIterator.MoveNext())
     {
         Fieldable field = (Fieldable)fieldIterator.Current;
         Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms());
     }
 }
Beispiel #4
0
 public static void  CheckNorms(IndexReader reader)
 {
     // test omit norms
     for (int i = 0; i < DocHelper.fields.Length; i++)
     {
         Fieldable f = DocHelper.fields[i];
         if (f.IsIndexed())
         {
             Assert.AreEqual(reader.HasNorms(f.Name()), !f.GetOmitNorms());
             Assert.AreEqual(reader.HasNorms(f.Name()), !DocHelper.noNorms.Contains(f.Name()));
             if (!reader.HasNorms(f.Name()))
             {
                 // test for fake norms of 1.0 or null depending on the flag
                 byte[] norms = reader.Norms(f.Name());
                 byte   norm1 = DefaultSimilarity.EncodeNorm(1.0f);
                 if (reader.GetDisableFakeNorms())
                 {
                     Assert.IsNull(norms);
                 }
                 else
                 {
                     Assert.AreEqual(norms.Length, reader.MaxDoc());
                     for (int j = 0; j < reader.MaxDoc(); j++)
                     {
                         Assert.AreEqual(norms[j], norm1);
                     }
                 }
                 norms = new byte[reader.MaxDoc()];
                 reader.Norms(f.Name(), norms, 0);
                 for (int j = 0; j < reader.MaxDoc(); j++)
                 {
                     Assert.AreEqual(norms[j], norm1);
                 }
             }
         }
     }
 }
Beispiel #5
0
        public virtual void  TestDocument()
        {
            Assert.IsTrue(reader.NumDocs() == 1);
            Assert.IsTrue(reader.MaxDoc() >= 1);
            Document result = reader.Document(0);

            Assert.IsTrue(result != null);
            //There are 2 unstored fields on the document that are not preserved across writing
            Assert.IsTrue(DocHelper.NumFields(result) == DocHelper.NumFields(testDoc) - DocHelper.unstored.Count);

            System.Collections.IList fields = result.GetFields();
            for (System.Collections.IEnumerator iter = fields.GetEnumerator(); iter.MoveNext();)
            {
                Fieldable field = (Fieldable)iter.Current;
                Assert.IsTrue(field != null);
                Assert.IsTrue(DocHelper.nameValues.Contains(field.Name()));
            }
        }
Beispiel #6
0
        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            System.Collections.IEnumerator fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable field = (Fieldable)fieldIterator.Current;
                // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
                // and field.binaryValue() already returns the compressed value for a field
                // with isCompressed()==true, so we disable compression in that case
                bool disableCompression = (field is FieldsReader.FieldForMerge);
                if (field.IsStored())
                {
                    fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));

                    byte bits = 0;
                    if (field.IsTokenized())
                    {
                        bits |= FieldsWriter.FIELD_IS_TOKENIZED;
                    }
                    if (field.IsBinary())
                    {
                        bits |= FieldsWriter.FIELD_IS_BINARY;
                    }
                    if (field.IsCompressed())
                    {
                        bits |= FieldsWriter.FIELD_IS_COMPRESSED;
                    }

                    fieldsStream.WriteByte(bits);

                    if (field.IsCompressed())
                    {
                        // compression is enabled for the current field
                        byte[] data = null;

                        if (disableCompression)
                        {
                            // optimized case for merging, the data
                            // is already compressed
                            data = field.BinaryValue();
                        }
                        else
                        {
                            // check if it is a binary field
                            if (field.IsBinary())
                            {
                                data = Compress(field.BinaryValue());
                            }
                            else
                            {
                                data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
                            }
                        }
                        int len = data.Length;
                        fieldsStream.WriteVInt(len);
                        fieldsStream.WriteBytes(data, len);
                    }
                    else
                    {
                        // compression is disabled for the current field
                        if (field.IsBinary())
                        {
                            byte[] data = field.BinaryValue();
                            int    len  = data.Length;
                            fieldsStream.WriteVInt(len);
                            fieldsStream.WriteBytes(data, len);
                        }
                        else
                        {
                            fieldsStream.WriteString(field.StringValue());
                        }
                    }
                }
            }
        }
        /// <summary>Returns the offsetGap from the analyzer assigned to fiel</summary>
        public override int GetOffsetGap(Lucene.Net.Documents.Fieldable field)
        {
            var analyzer = GetAnalyzer(field.Name());

            return(analyzer.GetOffsetGap(field));
        }
Beispiel #8
0
 private static void  Add(System.Collections.IDictionary map, Fieldable field)
 {
     map[field.Name()] = field;
 }
Beispiel #9
0
		private static void  Add(System.Collections.IDictionary map, Fieldable field)
		{
			map[field.Name()] = field;
		}
Beispiel #10
0
        public override DocumentsWriter.DocWriter ProcessDocument()
        {
            consumer.StartDocument();
            fieldsWriter.StartDocument();

            Document doc = docState.doc;

            System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));

            fieldCount = 0;

            int thisFieldGen = fieldGen++;

            System.Collections.IList docFields = doc.GetFields();
            int numDocFields = docFields.Count;

            // Absorb any new fields first seen in this document.
            // Also absorb any changes to fields we had already
            // seen before (eg suddenly turning on norms or
            // vectors, etc.):

            for (int i = 0; i < numDocFields; i++)
            {
                Fieldable     field     = (Fieldable)docFields[i];
                System.String fieldName = field.Name();

                // Make sure we have a PerField allocated
                int hashPos = fieldName.GetHashCode() & hashMask;
                DocFieldProcessorPerField fp = fieldHash[hashPos];
                while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
                {
                    fp = fp.next;
                }

                if (fp == null)
                {
                    // TODO FI: we need to genericize the "flags" that a
                    // field holds, and, how these flags are merged; it
                    // needs to be more "pluggable" such that if I want
                    // to have a new "thing" my Fields can do, I can
                    // easily add it
                    FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());

                    fp                 = new DocFieldProcessorPerField(this, fi);
                    fp.next            = fieldHash[hashPos];
                    fieldHash[hashPos] = fp;
                    totalFieldCount++;

                    if (totalFieldCount >= fieldHash.Length / 2)
                    {
                        Rehash();
                    }
                }
                else
                {
                    fp.fieldInfo.Update(field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms(), false, field.GetOmitTf());
                }

                if (thisFieldGen != fp.lastGen)
                {
                    // First time we're seeing this field for this doc
                    fp.fieldCount = 0;

                    if (fieldCount == fields.Length)
                    {
                        int newSize = fields.Length * 2;
                        DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
                        Array.Copy(fields, 0, newArray, 0, fieldCount);
                        fields = newArray;
                    }

                    fields[fieldCount++] = fp;
                    fp.lastGen           = thisFieldGen;
                }

                if (fp.fieldCount == fp.fields.Length)
                {
                    Fieldable[] newArray = new Fieldable[fp.fields.Length * 2];
                    Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
                    fp.fields = newArray;
                }

                fp.fields[fp.fieldCount++] = field;
                if (field.IsStored())
                {
                    fieldsWriter.AddField(field, fp.fieldInfo);
                }
            }

            // If we are writing vectors then we must visit
            // fields in sorted order so they are written in
            // sorted order.  TODO: we actually only need to
            // sort the subset of fields that have vectors
            // enabled; we could save [small amount of] CPU
            // here.
            QuickSort(fields, 0, fieldCount - 1);

            for (int i = 0; i < fieldCount; i++)
            {
                fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);
            }

            if (docState.maxTermPrefix != null && docState.infoStream != null)
            {
                docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
                docState.maxTermPrefix = null;
            }

            DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
            DocumentsWriter.DocWriter two = consumer.FinishDocument();
            if (one == null)
            {
                return(two);
            }
            else if (two == null)
            {
                return(one);
            }
            else
            {
                PerDoc both = GetPerDoc();
                both.docID = docState.docID;
                System.Diagnostics.Debug.Assert(one.docID == docState.docID);
                System.Diagnostics.Debug.Assert(two.docID == docState.docID);
                both.one = one;
                both.two = two;
                return(both);
            }
        }
        // Tokenizes the fields of a document into Postings.
        private void  InvertDocument(Document doc)
        {
            System.Collections.IEnumerator fieldIterator = doc.GetFields().GetEnumerator();
            while (fieldIterator.MoveNext())
            {
                Fieldable     field       = (Fieldable)fieldIterator.Current;
                System.String fieldName   = field.Name();
                int           fieldNumber = fieldInfos.FieldNumber(fieldName);

                int length   = fieldLengths[fieldNumber];               // length of field
                int position = fieldPositions[fieldNumber];             // position in field
                if (length > 0)
                {
                    position += analyzer.GetPositionIncrementGap(fieldName);
                }
                int offset = fieldOffsets[fieldNumber];                 // offset field

                if (field.IsIndexed())
                {
                    if (!field.IsTokenized())
                    {
                        // un-tokenized field
                        System.String stringValue = field.StringValue();
                        if (field.IsStoreOffsetWithTermVector())
                        {
                            AddPosition(fieldName, stringValue, position++, new TermVectorOffsetInfo(offset, offset + stringValue.Length));
                        }
                        else
                        {
                            AddPosition(fieldName, stringValue, position++, null);
                        }
                        offset += stringValue.Length;
                        length++;
                    }
                    else
                    {
                        System.IO.TextReader reader;                         // find or make Reader
                        if (field.ReaderValue() != null)
                        {
                            reader = field.ReaderValue();
                        }
                        else if (field.StringValue() != null)
                        {
                            reader = new System.IO.StringReader(field.StringValue());
                        }
                        else
                        {
                            throw new System.ArgumentException("field must have either String or Reader value");
                        }

                        // Tokenize field and add to postingTable
                        TokenStream stream = analyzer.TokenStream(fieldName, reader);
                        try
                        {
                            Token lastToken = null;
                            for (Token t = stream.Next(); t != null; t = stream.Next())
                            {
                                position += (t.GetPositionIncrement() - 1);

                                if (field.IsStoreOffsetWithTermVector())
                                {
                                    AddPosition(fieldName, t.TermText(), position++, new TermVectorOffsetInfo(offset + t.StartOffset(), offset + t.EndOffset()));
                                }
                                else
                                {
                                    AddPosition(fieldName, t.TermText(), position++, null);
                                }

                                lastToken = t;
                                if (++length >= maxFieldLength)
                                {
                                    if (infoStream != null)
                                    {
                                        infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached, ignoring following tokens");
                                    }
                                    break;
                                }
                            }

                            if (lastToken != null)
                            {
                                offset += lastToken.EndOffset() + 1;
                            }
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    fieldLengths[fieldNumber]   = length;                   // save field length
                    fieldPositions[fieldNumber] = position;                 // save field position
                    fieldBoosts[fieldNumber]   *= field.GetBoost();
                    fieldOffsets[fieldNumber]   = offset;
                }
            }
        }
Beispiel #12
0
		private static void  Add(System.Collections.IDictionary map, Fieldable field)
		{
			if (field == null) System.Console.WriteLine("FIELD IS NULL!!!");
			if (field == null) System.Console.WriteLine("FIELD IS NULL!!!");
			if (field == null) System.Console.WriteLine("FIELD IS NULL!!!");
			if (map == null) System.Console.WriteLine("MAP IS NULL!!!");
			if (map == null) System.Console.WriteLine("MAP IS NULL!!!");
			if (map == null) System.Console.WriteLine("MAP IS NULL!!!");
			if (field.Name() == null) System.Console.WriteLine("FIELD NAME IS NULL!!!");
			if (field.Name() == null) System.Console.WriteLine("FIELD NAME IS NULL!!!");
			if (field.Name() == null) System.Console.WriteLine("FIELD NAME IS NULL!!!");
			map[field.Name()] = field;
		}