private void AddDoc(IndexWriter iw, int i) { Document d = new Document(); Fieldable f; int scoreAndID = i + 1; f = new Field(ID_FIELD, Id2String(scoreAndID), Field.Store.YES, Field.Index.NOT_ANALYZED); // for debug purposes f.SetOmitNorms(true); d.Add(f); f = new Field(TEXT_FIELD, "text of doc" + scoreAndID + TextLine(i), Field.Store.NO, Field.Index.ANALYZED); // for regular search f.SetOmitNorms(true); d.Add(f); f = new Field(INT_FIELD, "" + scoreAndID, Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring f.SetOmitNorms(true); d.Add(f); f = new Field(FLOAT_FIELD, scoreAndID + ".000", Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring f.SetOmitNorms(true); d.Add(f); iw.AddDocument(d); Log("added: " + d); }
private static Document MakeDocument(System.String docText) { Document doc = new Document(); Field f = new Field("f", docText, Field.Store.NO, Field.Index.ANALYZED); f.SetOmitNorms(true); doc.Add(f); return(doc); }
/*internal*/ public Document Doc(int n) { indexStream.Seek(n * 8L); long position = indexStream.ReadLong(); fieldsStream.Seek(position); Document doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); byte bits = fieldsStream.ReadByte(); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); if (compressed) doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS)); else doc.Add(new Field(fi.name, b, Field.Store.YES)); } else { Field.Index index; Field.Store store = Field.Store.YES; if (fi.isIndexed && tokenize) index = Field.Index.TOKENIZED; else if (fi.isIndexed && !tokenize) index = Field.Index.UN_TOKENIZED; else index = Field.Index.NO; Field.TermVector termVector = null; if (fi.storeTermVector) { if (fi.storeOffsetWithTermVector) { if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else { termVector = Field.TermVector.WITH_OFFSETS; } } else if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS; } else { termVector = Field.TermVector.YES; } } else { termVector = Field.TermVector.NO; } if (compressed) { store = Field.Store.COMPRESS; byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } else { Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } } } return doc; }
private static Document MakeDocument(System.String docText) { Document doc = new Document(); Field f = new Field("f", docText, Field.Store.NO, Field.Index.ANALYZED); f.SetOmitNorms(true); doc.Add(f); return doc; }
public /*internal*/ Document Doc(int n) { indexStream.Seek(n * 8L); long position = indexStream.ReadLong(); fieldsStream.Seek(position); Document doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); byte bits = fieldsStream.ReadByte(); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); if (compressed) { doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS)); } else { doc.Add(new Field(fi.name, b, Field.Store.YES)); } } else { Field.Index index; Field.Store store = Field.Store.YES; if (fi.isIndexed && tokenize) { index = Field.Index.TOKENIZED; } else if (fi.isIndexed && !tokenize) { index = Field.Index.UN_TOKENIZED; } else { index = Field.Index.NO; } Field.TermVector termVector = null; if (fi.storeTermVector) { if (fi.storeOffsetWithTermVector) { if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else { termVector = Field.TermVector.WITH_OFFSETS; } } else if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS; } else { termVector = Field.TermVector.YES; } } else { termVector = Field.TermVector.NO; } if (compressed) { store = Field.Store.COMPRESS; byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } else { Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } } } return(doc); }
public virtual void TestEnablingNorms() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); // Enable norms for only 1 doc, pre flush for (int j = 0; j < 10; j++) { Document doc = new Document(); Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); if (j != 8) { f.SetOmitNorms(true); } doc.Add(f); writer.AddDocument(doc); } writer.Close(); Term searchTerm = new Term("field", "aaa"); IndexSearcher searcher = new IndexSearcher(dir); Hits hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(10, hits.Length()); searcher.Close(); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); // Enable norms for only 1 doc, post flush for (int j = 0; j < 27; j++) { Document doc = new Document(); Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); if (j != 26) { f.SetOmitNorms(true); } doc.Add(f); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(dir); hits = searcher.Search(new TermQuery(searchTerm)); Assert.AreEqual(27, hits.Length()); searcher.Close(); IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.Close(); }
static DocHelper() { textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED); { textField3.SetOmitNorms(true); } keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED); noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); noTFField = new Field(NO_TF_KEY, NO_TF_TEXT, Field.Store.YES, Field.Index.ANALYZED); { noTFField.SetOmitTermFreqAndPositions(true); } unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO); unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO); unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); fields = new Field[] { textField1, textField2, textField3, compressedTextField2, keyField, noNormsField, noTFField, unIndField, unStoredField1, unStoredField2, textUtfField1, textUtfField2, lazyField, lazyFieldBinary, largeLazyField }; { //Initialize the large Lazy Field System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < 10000; i++) { buffer.Append("Lazily loading lengths of language in lieu of laughing "); } try { LAZY_FIELD_BINARY_BYTES = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes"); } catch (System.IO.IOException e) { } lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES); fields[fields.Length - 2] = lazyFieldBinary; LARGE_LAZY_FIELD_TEXT = buffer.ToString(); largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); fields[fields.Length - 1] = largeLazyField; for (int i = 0; i < fields.Length; i++) { Fieldable f = fields[i]; Add(all, f); if (f.IsIndexed()) { Add(indexed, f); } else { Add(unindexed, f); } if (f.IsTermVectorStored()) { Add(termvector, f); } if (f.IsIndexed() && !f.IsTermVectorStored()) { Add(notermvector, f); } if (f.IsStored()) { Add(stored, f); } else { Add(unstored, f); } if (f.GetOmitNorms()) { Add(noNorms, f); } if (f.GetOmitTf()) { Add(noTf, f); } if (f.IsLazy()) { Add(lazy, f); } } } { nameValues = new System.Collections.Hashtable(); nameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT; nameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT; nameValues[COMPRESSED_TEXT_FIELD_2_KEY] = FIELD_2_COMPRESSED_TEXT; nameValues[TEXT_FIELD_3_KEY] = FIELD_3_TEXT; nameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT; nameValues[NO_NORMS_KEY] = NO_NORMS_TEXT; nameValues[NO_TF_KEY] = NO_TF_TEXT; nameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT; nameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT; nameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT; nameValues[LAZY_FIELD_KEY] = LAZY_FIELD_TEXT; nameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES; nameValues[LARGE_LAZY_FIELD_KEY] = LARGE_LAZY_FIELD_TEXT; nameValues[TEXT_FIELD_UTF1_KEY] = FIELD_UTF1_TEXT; nameValues[TEXT_FIELD_UTF2_KEY] = FIELD_UTF2_TEXT; } }
private void AddDoc(IndexWriter iw, int i) { Document d = new Document(); Fieldable f; int scoreAndID = i + 1; f = new Field(ID_FIELD, Id2String(scoreAndID), Field.Store.YES, Field.Index.UN_TOKENIZED); // for debug purposes f.SetOmitNorms(true); d.Add(f); f = new Field(TEXT_FIELD, "text of doc" + scoreAndID + TextLine(i), Field.Store.NO, Field.Index.TOKENIZED); // for regular search f.SetOmitNorms(true); d.Add(f); f = new Field(INT_FIELD, "" + scoreAndID, Field.Store.NO, Field.Index.UN_TOKENIZED); // for function scoring f.SetOmitNorms(true); d.Add(f); f = new Field(FLOAT_FIELD, scoreAndID + ".000", Field.Store.NO, Field.Index.UN_TOKENIZED); // for function scoring f.SetOmitNorms(true); d.Add(f); iw.AddDocument(d); Log("added: " + d); }
static DocHelper() { textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED); { textField3.SetOmitNorms(true); } keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED); noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); noTFField = new Field(NO_TF_KEY, NO_TF_TEXT, Field.Store.YES, Field.Index.ANALYZED); { noTFField.SetOmitTermFreqAndPositions(true); } unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO); unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO); unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); fields = new Field[] { textField1, textField2, textField3, compressedTextField2, keyField, noNormsField, noTFField, unIndField, unStoredField1, unStoredField2, textUtfField1, textUtfField2, lazyField, lazyFieldBinary, largeLazyField }; { //Initialize the large Lazy Field System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < 10000; i++) { buffer.Append("Lazily loading lengths of language in lieu of laughing "); } try { LAZY_FIELD_BINARY_BYTES = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes"); } catch (System.IO.IOException e) { } lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES); fields[fields.Length - 2] = lazyFieldBinary; LARGE_LAZY_FIELD_TEXT = buffer.ToString(); largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); fields[fields.Length - 1] = largeLazyField; for (int i = 0; i < fields.Length; i++) { Fieldable f = fields[i]; Add(all, f); if (f.IsIndexed()) Add(indexed, f); else Add(unindexed, f); if (f.IsTermVectorStored()) Add(termvector, f); if (f.IsIndexed() && !f.IsTermVectorStored()) Add(notermvector, f); if (f.IsStored()) Add(stored, f); else Add(unstored, f); if (f.GetOmitNorms()) Add(noNorms, f); if (f.GetOmitTf()) Add(noTf, f); if (f.IsLazy()) Add(lazy, f); } } { nameValues = new System.Collections.Hashtable(); nameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT; nameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT; nameValues[COMPRESSED_TEXT_FIELD_2_KEY] = FIELD_2_COMPRESSED_TEXT; nameValues[TEXT_FIELD_3_KEY] = FIELD_3_TEXT; nameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT; nameValues[NO_NORMS_KEY] = NO_NORMS_TEXT; nameValues[NO_TF_KEY] = NO_TF_TEXT; nameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT; nameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT; nameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT; nameValues[LAZY_FIELD_KEY] = LAZY_FIELD_TEXT; nameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES; nameValues[LARGE_LAZY_FIELD_KEY] = LARGE_LAZY_FIELD_TEXT; nameValues[TEXT_FIELD_UTF1_KEY] = FIELD_UTF1_TEXT; nameValues[TEXT_FIELD_UTF2_KEY] = FIELD_UTF2_TEXT; } }
public /*internal*/ Document Doc(int n, string[] fields) { if (fields == null || fields.Length == 0) { return(Doc(n)); } // FIXME: use Hashset ArrayList field_list = new ArrayList(fields); int num_required_fields = field_list.Count; indexStream.Seek(n * 8L); long position = indexStream.ReadLong(); fieldsStream.Seek(position); Document doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields && num_required_fields > 0; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); if (field_list.Contains(fi.name)) { num_required_fields--; byte bits = fieldsStream.ReadByte(); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); if (compressed) { doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS)); } else { doc.Add(new Field(fi.name, b, Field.Store.YES)); } } else { Field.Index index; Field.Store store = Field.Store.YES; if (fi.isIndexed && tokenize) { index = Field.Index.TOKENIZED; } else if (fi.isIndexed && !tokenize) { index = Field.Index.UN_TOKENIZED; } else { index = Field.Index.NO; } Field.TermVector termVector = null; if (fi.storeTermVector) { if (fi.storeOffsetWithTermVector) { if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else { termVector = Field.TermVector.WITH_OFFSETS; } } else if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS; } else { termVector = Field.TermVector.YES; } } else { termVector = Field.TermVector.NO; } if (compressed) { store = Field.Store.COMPRESS; byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } else { Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } } } else { byte bits = fieldsStream.ReadByte(); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { //byte[] b = new byte[fieldsStream.ReadVInt()]; //fieldsStream.ReadBytes(b, 0, b.Length); int length = fieldsStream.ReadVInt(); for (int j = 0; j < length; j++) { fieldsStream.ReadByte(); } } else { if (compressed) { //byte[] b = new byte[fieldsStream.ReadVInt()]; //fieldsStream.ReadBytes(b, 0, b.Length); int length = fieldsStream.ReadVInt(); for (int j = 0; j < length; j++) { fieldsStream.ReadByte(); } } else { //fieldsStream.ReadString (); int length = fieldsStream.ReadVInt(); for (int j = 0; j < length; j++) { byte b = fieldsStream.ReadByte(); if ((b & 0x80) == 0) { continue; } else if ((b & 0xE0) != 0xE0) { fieldsStream.ReadByte(); } else { fieldsStream.ReadByte(); fieldsStream.ReadByte(); } } } } } } return(doc); }
public /*internal*/ Document Doc(int n, string[] fields) { if (fields == null || fields.Length == 0) return Doc (n); // FIXME: use Hashset ArrayList field_list = new ArrayList (fields); int num_required_fields = field_list.Count; indexStream.Seek(n * 8L); long position = indexStream.ReadLong(); fieldsStream.Seek(position); Document doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields && num_required_fields > 0; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); if (field_list.Contains (fi.name)) { num_required_fields --; byte bits = fieldsStream.ReadByte(); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); if (compressed) doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS)); else doc.Add(new Field(fi.name, b, Field.Store.YES)); } else { Field.Index index; Field.Store store = Field.Store.YES; if (fi.isIndexed && tokenize) index = Field.Index.TOKENIZED; else if (fi.isIndexed && !tokenize) index = Field.Index.UN_TOKENIZED; else index = Field.Index.NO; Field.TermVector termVector = null; if (fi.storeTermVector) { if (fi.storeOffsetWithTermVector) { if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else { termVector = Field.TermVector.WITH_OFFSETS; } } else if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS; } else { termVector = Field.TermVector.YES; } } else { termVector = Field.TermVector.NO; } if (compressed) { store = Field.Store.COMPRESS; byte[] b = new byte[fieldsStream.ReadVInt()]; fieldsStream.ReadBytes(b, 0, b.Length); Field f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } else { Field f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector); f.SetOmitNorms(fi.omitNorms); doc.Add(f); } } } else { byte bits = fieldsStream.ReadByte(); bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { //byte[] b = new byte[fieldsStream.ReadVInt()]; //fieldsStream.ReadBytes(b, 0, b.Length); int length = fieldsStream.ReadVInt(); for (int j = 0; j < length; j++) fieldsStream.ReadByte (); } else { if (compressed) { //byte[] b = new byte[fieldsStream.ReadVInt()]; //fieldsStream.ReadBytes(b, 0, b.Length); int length = fieldsStream.ReadVInt(); for (int j = 0; j < length; j++) fieldsStream.ReadByte (); } else { //fieldsStream.ReadString (); int length = fieldsStream.ReadVInt(); for (int j = 0; j < length; j++) { byte b = fieldsStream.ReadByte (); if ((b & 0x80) == 0) continue; else if ((b & 0xE0) != 0xE0) { fieldsStream.ReadByte (); } else { fieldsStream.ReadByte (); fieldsStream.ReadByte (); } } } } } } return doc; }