/// <summary>Adds field info for a Document. </summary> public void Add(Document doc) { foreach(Field field in doc.Fields()) { Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms()); } }
/// <summary>Adds field info for a Document. </summary> public void Add(Document doc) { foreach (Field field in doc.Fields()) { Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms()); } }
public static int NumFields(Document doc) { int result = 0; foreach (Field field in doc.Fields()) { System.Object generatedAux = field; result++; } return(result); }
public virtual void TestDocument() { try { Assert.IsTrue(reader.NumDocs() == 1); Assert.IsTrue(reader.MaxDoc() >= 1); Document result = reader.Document(0); Assert.IsTrue(result != null); //There are 2 unstored fields on the document that are not preserved across writing Assert.IsTrue(DocHelper.NumFields(result) == DocHelper.NumFields(testDoc) - 2); foreach (Field field in result.Fields()) { Assert.IsTrue(field != null); Assert.IsTrue(DocHelper.nameValues.Contains(field.Name())); } } catch (System.IO.IOException e) { System.Console.Error.WriteLine(e.StackTrace); Assert.IsTrue(false); } }
// Tokenizes the fields of a document into Postings. private void InvertDocument(Document doc) { foreach(Field field in doc.Fields()) { System.String fieldName = field.Name(); int fieldNumber = fieldInfos.FieldNumber(fieldName); int length = fieldLengths[fieldNumber]; // length of field int position = fieldPositions[fieldNumber]; // position in field if (length > 0) position += analyzer.GetPositionIncrementGap(fieldName); int offset = fieldOffsets[fieldNumber]; // offset field if (field.IsIndexed()) { if (!field.IsTokenized()) { // un-tokenized field System.String stringValue = field.StringValue(); if (field.IsStoreOffsetWithTermVector()) AddPosition(fieldName, stringValue, position++, new TermVectorOffsetInfo(offset, offset + stringValue.Length)); else AddPosition(fieldName, stringValue, position++, null); offset += stringValue.Length; length++; } else { System.IO.TextReader reader; // find or make Reader if (field.ReaderValue() != null) reader = field.ReaderValue(); else if (field.StringValue() != null) reader = new System.IO.StringReader(field.StringValue()); else throw new System.ArgumentException("field must have either String or Reader value"); // Tokenize field and add to postingTable TokenStream stream = analyzer.TokenStream(fieldName, reader); try { Token lastToken = null; for (Token t = stream.Next(); t != null; t = stream.Next()) { position += (t.GetPositionIncrement() - 1); if (field.IsStoreOffsetWithTermVector()) AddPosition(fieldName, t.TermText(), position++, new TermVectorOffsetInfo(offset + t.StartOffset(), offset + t.EndOffset())); else AddPosition(fieldName, t.TermText(), position++, null); lastToken = t; if (++length > maxFieldLength) { if (infoStream != null) infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached, ignoring following tokens"); break; } } if (lastToken != null) offset += lastToken.EndOffset() + 1; } finally { stream.Close(); } } fieldLengths[fieldNumber] = length; // save field length fieldPositions[fieldNumber] = position; // save field position fieldBoosts[fieldNumber] *= field.GetBoost(); fieldOffsets[fieldNumber] = offset; } } }
internal void AddDocument(Document doc) { indexStream.WriteLong(fieldsStream.GetFilePointer()); int storedCount = 0; foreach (Field field in doc.Fields()) { if (field.IsStored()) { storedCount++; } } fieldsStream.WriteVInt(storedCount); foreach (Field field in doc.Fields()) { if (field.IsStored()) { fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name())); byte bits = 0; if (field.IsTokenized()) { bits |= FieldsWriter.FIELD_IS_TOKENIZED; } if (field.IsBinary()) { bits |= FieldsWriter.FIELD_IS_BINARY; } if (field.IsCompressed()) { bits |= FieldsWriter.FIELD_IS_COMPRESSED; } fieldsStream.WriteByte(bits); if (field.IsCompressed()) { // compression is enabled for the current field byte[] data = null; // check if it is a binary field if (field.IsBinary()) { data = Compress(field.BinaryValue()); } else { data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue())); } int len = data.Length; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, len); } else { // compression is disabled for the current field if (field.IsBinary()) { byte[] data = field.BinaryValue(); int len = data.Length; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, len); } else { fieldsStream.WriteString(field.StringValue()); } } } } }
public static int NumFields(Document doc) { int result = 0; foreach (Field field in doc.Fields()) { System.Object generatedAux = field; result++; } return result; }
internal void AddDocument(Document doc) { indexStream.WriteLong(fieldsStream.GetFilePointer()); int storedCount = 0; foreach (Field field in doc.Fields()) { if (field.IsStored()) storedCount++; } fieldsStream.WriteVInt(storedCount); foreach (Field field in doc.Fields()) { if (field.IsStored()) { fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name())); byte bits = 0; if (field.IsTokenized()) bits |= FieldsWriter.FIELD_IS_TOKENIZED; if (field.IsBinary()) bits |= FieldsWriter.FIELD_IS_BINARY; if (field.IsCompressed()) bits |= FieldsWriter.FIELD_IS_COMPRESSED; fieldsStream.WriteByte(bits); if (field.IsCompressed()) { // compression is enabled for the current field byte[] data = null; // check if it is a binary field if (field.IsBinary()) { data = Compress(field.BinaryValue()); } else { data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue())); } int len = data.Length; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, len); } else { // compression is disabled for the current field if (field.IsBinary()) { byte[] data = field.BinaryValue(); int len = data.Length; fieldsStream.WriteVInt(len); fieldsStream.WriteBytes(data, len); } else { fieldsStream.WriteString(field.StringValue()); } } } } }
// Tokenizes the fields of a document into Postings. private void InvertDocument(Document doc) { foreach (Field field in doc.Fields()) { System.String fieldName = field.Name(); int fieldNumber = fieldInfos.FieldNumber(fieldName); int length = fieldLengths[fieldNumber]; // length of field int position = fieldPositions[fieldNumber]; // position in field if (length > 0) { position += analyzer.GetPositionIncrementGap(fieldName); } int offset = fieldOffsets[fieldNumber]; // offset field if (field.IsIndexed()) { if (!field.IsTokenized()) { // un-tokenized field System.String stringValue = field.StringValue(); if (field.IsStoreOffsetWithTermVector()) { AddPosition(fieldName, stringValue, position++, new TermVectorOffsetInfo(offset, offset + stringValue.Length)); } else { AddPosition(fieldName, stringValue, position++, null); } offset += stringValue.Length; length++; } else { System.IO.TextReader reader; // find or make Reader if (field.ReaderValue() != null) { reader = field.ReaderValue(); } else if (field.StringValue() != null) { reader = new System.IO.StringReader(field.StringValue()); } else { throw new System.ArgumentException("field must have either String or Reader value"); } // Tokenize field and add to postingTable TokenStream stream = analyzer.TokenStream(fieldName, reader); try { Token lastToken = null; for (Token t = stream.Next(); t != null; t = stream.Next()) { position += (t.GetPositionIncrement() - 1); if (field.IsStoreOffsetWithTermVector()) { AddPosition(fieldName, t.TermText(), position++, new TermVectorOffsetInfo(offset + t.StartOffset(), offset + t.EndOffset())); } else { AddPosition(fieldName, t.TermText(), position++, null); } lastToken = t; if (++length > maxFieldLength) { if (infoStream != null) { infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached, ignoring following tokens"); } break; } } if (lastToken != null) { offset += lastToken.EndOffset() + 1; } } finally { stream.Close(); } } fieldLengths[fieldNumber] = length; // save field length fieldPositions[fieldNumber] = position; // save field position fieldBoosts[fieldNumber] *= field.GetBoost(); fieldOffsets[fieldNumber] = offset; } } }