Exemple #1
0
		/// <summary>Adds field info for a Document. </summary>
		public void  Add(Document doc)
		{
	foreach(Field field in doc.Fields())
	{
				Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms());
			}
		}
Exemple #2
0
 /// <summary>Adds field info for a Document. </summary>
 public void  Add(Document doc)
 {
     foreach (Field field in doc.Fields())
     {
         Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms());
     }
 }
Exemple #3
0
        public static int NumFields(Document doc)
        {
            int result = 0;

            foreach (Field field in doc.Fields())
            {
                System.Object generatedAux = field;
                result++;
            }
            return(result);
        }
Exemple #4
0
        public virtual void  TestDocument()
        {
            try
            {
                Assert.IsTrue(reader.NumDocs() == 1);
                Assert.IsTrue(reader.MaxDoc() >= 1);
                Document result = reader.Document(0);
                Assert.IsTrue(result != null);
                //There are 2 unstored fields on the document that are not preserved across writing
                Assert.IsTrue(DocHelper.NumFields(result) == DocHelper.NumFields(testDoc) - 2);

                foreach (Field field in result.Fields())
                {
                    Assert.IsTrue(field != null);
                    Assert.IsTrue(DocHelper.nameValues.Contains(field.Name()));
                }
            }
            catch (System.IO.IOException e)
            {
                System.Console.Error.WriteLine(e.StackTrace);
                Assert.IsTrue(false);
            }
        }
        // Tokenizes the fields of a document into Postings.
        private void InvertDocument(Document doc)
        {
            foreach(Field field in doc.Fields())
            {
                System.String fieldName = field.Name();
                int fieldNumber = fieldInfos.FieldNumber(fieldName);

                int length = fieldLengths[fieldNumber]; // length of field
                int position = fieldPositions[fieldNumber]; // position in field
                if (length > 0)
                    position += analyzer.GetPositionIncrementGap(fieldName);
                int offset = fieldOffsets[fieldNumber]; // offset field

                if (field.IsIndexed())
                {
                    if (!field.IsTokenized())
                    {
                        // un-tokenized field
                        System.String stringValue = field.StringValue();
                        if (field.IsStoreOffsetWithTermVector())
                            AddPosition(fieldName, stringValue, position++, new TermVectorOffsetInfo(offset, offset + stringValue.Length));
                        else
                            AddPosition(fieldName, stringValue, position++, null);
                        offset += stringValue.Length;
                        length++;
                    }
                    else
                    {
                        System.IO.TextReader reader; // find or make Reader
                        if (field.ReaderValue() != null)
                            reader = field.ReaderValue();
                        else if (field.StringValue() != null)
                            reader = new System.IO.StringReader(field.StringValue());
                        else
                            throw new System.ArgumentException("field must have either String or Reader value");

                        // Tokenize field and add to postingTable
                        TokenStream stream = analyzer.TokenStream(fieldName, reader);
                        try
                        {
                            Token lastToken = null;
                            for (Token t = stream.Next(); t != null; t = stream.Next())
                            {
                                position += (t.GetPositionIncrement() - 1);

                                if (field.IsStoreOffsetWithTermVector())
                                    AddPosition(fieldName, t.TermText(), position++, new TermVectorOffsetInfo(offset + t.StartOffset(), offset + t.EndOffset()));
                                else
                                    AddPosition(fieldName, t.TermText(), position++, null);

                                lastToken = t;
                                if (++length > maxFieldLength)
                                {
                                    if (infoStream != null)
                                        infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached, ignoring following tokens");
                                    break;
                                }
                            }

                            if (lastToken != null)
                                offset += lastToken.EndOffset() + 1;
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    fieldLengths[fieldNumber] = length; // save field length
                    fieldPositions[fieldNumber] = position; // save field position
                    fieldBoosts[fieldNumber] *= field.GetBoost();
                    fieldOffsets[fieldNumber] = offset;
                }
            }
        }
        internal void  AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;

            foreach (Field field  in doc.Fields())
            {
                if (field.IsStored())
                {
                    storedCount++;
                }
            }
            fieldsStream.WriteVInt(storedCount);

            foreach (Field field in doc.Fields())
            {
                if (field.IsStored())
                {
                    fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));

                    byte bits = 0;
                    if (field.IsTokenized())
                    {
                        bits |= FieldsWriter.FIELD_IS_TOKENIZED;
                    }
                    if (field.IsBinary())
                    {
                        bits |= FieldsWriter.FIELD_IS_BINARY;
                    }
                    if (field.IsCompressed())
                    {
                        bits |= FieldsWriter.FIELD_IS_COMPRESSED;
                    }

                    fieldsStream.WriteByte(bits);

                    if (field.IsCompressed())
                    {
                        // compression is enabled for the current field
                        byte[] data = null;
                        // check if it is a binary field
                        if (field.IsBinary())
                        {
                            data = Compress(field.BinaryValue());
                        }
                        else
                        {
                            data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
                        }
                        int len = data.Length;
                        fieldsStream.WriteVInt(len);
                        fieldsStream.WriteBytes(data, len);
                    }
                    else
                    {
                        // compression is disabled for the current field
                        if (field.IsBinary())
                        {
                            byte[] data = field.BinaryValue();
                            int    len  = data.Length;
                            fieldsStream.WriteVInt(len);
                            fieldsStream.WriteBytes(data, len);
                        }
                        else
                        {
                            fieldsStream.WriteString(field.StringValue());
                        }
                    }
                }
            }
        }
Exemple #7
0
		public static int NumFields(Document doc)
		{
            int result = 0;
            foreach (Field field in doc.Fields())
            {
                System.Object generatedAux = field;
                result++;
            }
            return result;
		}
        internal void AddDocument(Document doc)
        {
            indexStream.WriteLong(fieldsStream.GetFilePointer());

            int storedCount = 0;
            foreach (Field field  in doc.Fields())
            {
                if (field.IsStored())
                    storedCount++;
            }
            fieldsStream.WriteVInt(storedCount);

            foreach (Field field in doc.Fields())
            {
                if (field.IsStored())
                {
                    fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name()));

                    byte bits = 0;
                    if (field.IsTokenized())
                        bits |= FieldsWriter.FIELD_IS_TOKENIZED;
                    if (field.IsBinary())
                        bits |= FieldsWriter.FIELD_IS_BINARY;
                    if (field.IsCompressed())
                        bits |= FieldsWriter.FIELD_IS_COMPRESSED;

                    fieldsStream.WriteByte(bits);

                    if (field.IsCompressed())
                    {
                        // compression is enabled for the current field
                        byte[] data = null;
                        // check if it is a binary field
                        if (field.IsBinary())
                        {
                            data = Compress(field.BinaryValue());
                        }
                        else
                        {
                            data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
                        }
                        int len = data.Length;
                        fieldsStream.WriteVInt(len);
                        fieldsStream.WriteBytes(data, len);
                    }
                    else
                    {
                        // compression is disabled for the current field
                        if (field.IsBinary())
                        {
                            byte[] data = field.BinaryValue();
                            int len = data.Length;
                            fieldsStream.WriteVInt(len);
                            fieldsStream.WriteBytes(data, len);
                        }
                        else
                        {
                            fieldsStream.WriteString(field.StringValue());
                        }
                    }
                }
            }
        }
Exemple #9
0
        // Tokenizes the fields of a document into Postings.
        private void  InvertDocument(Document doc)
        {
            foreach (Field field in doc.Fields())
            {
                System.String fieldName   = field.Name();
                int           fieldNumber = fieldInfos.FieldNumber(fieldName);

                int length   = fieldLengths[fieldNumber];               // length of field
                int position = fieldPositions[fieldNumber];             // position in field
                if (length > 0)
                {
                    position += analyzer.GetPositionIncrementGap(fieldName);
                }
                int offset = fieldOffsets[fieldNumber];                 // offset field

                if (field.IsIndexed())
                {
                    if (!field.IsTokenized())
                    {
                        // un-tokenized field
                        System.String stringValue = field.StringValue();
                        if (field.IsStoreOffsetWithTermVector())
                        {
                            AddPosition(fieldName, stringValue, position++, new TermVectorOffsetInfo(offset, offset + stringValue.Length));
                        }
                        else
                        {
                            AddPosition(fieldName, stringValue, position++, null);
                        }
                        offset += stringValue.Length;
                        length++;
                    }
                    else
                    {
                        System.IO.TextReader reader;                         // find or make Reader
                        if (field.ReaderValue() != null)
                        {
                            reader = field.ReaderValue();
                        }
                        else if (field.StringValue() != null)
                        {
                            reader = new System.IO.StringReader(field.StringValue());
                        }
                        else
                        {
                            throw new System.ArgumentException("field must have either String or Reader value");
                        }

                        // Tokenize field and add to postingTable
                        TokenStream stream = analyzer.TokenStream(fieldName, reader);
                        try
                        {
                            Token lastToken = null;
                            for (Token t = stream.Next(); t != null; t = stream.Next())
                            {
                                position += (t.GetPositionIncrement() - 1);

                                if (field.IsStoreOffsetWithTermVector())
                                {
                                    AddPosition(fieldName, t.TermText(), position++, new TermVectorOffsetInfo(offset + t.StartOffset(), offset + t.EndOffset()));
                                }
                                else
                                {
                                    AddPosition(fieldName, t.TermText(), position++, null);
                                }

                                lastToken = t;
                                if (++length > maxFieldLength)
                                {
                                    if (infoStream != null)
                                    {
                                        infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached, ignoring following tokens");
                                    }
                                    break;
                                }
                            }

                            if (lastToken != null)
                            {
                                offset += lastToken.EndOffset() + 1;
                            }
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    fieldLengths[fieldNumber]   = length;                   // save field length
                    fieldPositions[fieldNumber] = position;                 // save field position
                    fieldBoosts[fieldNumber]   *= field.GetBoost();
                    fieldOffsets[fieldNumber]   = offset;
                }
            }
        }