/// <summary>Adds Field info for a Document. </summary> public void Add(Document doc) { foreach (Field field in doc.Fields()) { Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored()); } }
internal void AddDocument(Document doc) { indexStream.WriteLong(fieldsStream.GetFilePointer()); int storedCount = 0; foreach (Field field in doc.Fields()) { if (field.IsStored()) { storedCount++; } } fieldsStream.WriteVInt(storedCount); foreach (Field field in doc.Fields()) { if (field.IsStored()) { fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name())); byte bits = 0; if (field.IsTokenized()) { bits |= 1; } fieldsStream.WriteByte(bits); fieldsStream.WriteString(field.StringValue()); } } }
// Tokenizes the fields of a document into Postings. private void InvertDocument(Document doc) { foreach (Field field in doc.Fields()) { System.String fieldName = field.Name(); int fieldNumber = fieldInfos.FieldNumber(fieldName); int length = fieldLengths[fieldNumber]; // length of Field int position = fieldPositions[fieldNumber]; // position in Field if (field.IsIndexed()) { if (!field.IsTokenized()) { // un-tokenized Field AddPosition(fieldName, field.StringValue(), position++); length++; } else { System.IO.TextReader reader; // find or make Reader if (field.ReaderValue() != null) { reader = field.ReaderValue(); } else if (field.StringValue() != null) { reader = new System.IO.StringReader(field.StringValue()); } else { throw new System.ArgumentException("Field must have either String or Reader value"); } // Tokenize Field and add to postingTable TokenStream stream = analyzer.TokenStream(fieldName, reader); try { for (Token t = stream.Next(); t != null; t = stream.Next()) { position += (t.GetPositionIncrement() - 1); AddPosition(fieldName, t.TermText(), position++); if (++length > maxFieldLength) { break; } } } finally { stream.Close(); } } fieldLengths[fieldNumber] = length; // save Field length fieldPositions[fieldNumber] = position; // save Field position fieldBoosts[fieldNumber] *= field.GetBoost(); } } }
internal void AddDocument(Document doc) { indexStream.WriteLong(fieldsStream.GetFilePointer()); int storedCount = 0; foreach (Field field in doc.Fields()) { if (field.IsStored()) storedCount++; } fieldsStream.WriteVInt(storedCount); foreach (Field field in doc.Fields()) { if (field.IsStored()) { fieldsStream.WriteVInt(fieldInfos.FieldNumber(field.Name())); byte bits = 0; if (field.IsTokenized()) bits |= 1; fieldsStream.WriteByte(bits); fieldsStream.WriteString(field.StringValue()); } } }
/// <summary>Adds Field info for a Document. </summary> public void Add(Document doc) { foreach (Field field in doc.Fields()) { Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored()); } }
/*internal*/ public void AddDocument(System.String segment, Document doc) { // write Field names fieldInfos = new FieldInfos(); fieldInfos.Add(doc); fieldInfos.Write(directory, segment + ".fnm"); // write Field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.AddDocument(doc); } finally { fieldsWriter.Close(); } // invert doc into postingTable postingTable.Clear(); // clear postingTable fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts float boost = doc.GetBoost(); for (int i = 0; i < fieldBoosts.Length; i++) { fieldBoosts[i] = boost; } InvertDocument(doc); // sort postingTable into an array Posting[] postings = SortPostingTable(); /* * for (int i = 0; i < postings.length; i++) { * Posting posting = postings[i]; * System.out.print(posting.term); * System.out.print(" freq=" + posting.freq); * System.out.print(" pos="); * System.out.print(posting.positions[0]); * for (int j = 1; j < posting.freq; j++) * System.out.print("," + posting.positions[j]); * System.out.println(""); * } */ // write postings WritePostings(postings, segment); // write norms of indexed fields WriteNorms(doc, segment); }
/*internal*/ public void AddDocument(System.String segment, Document doc) { // write Field names fieldInfos = new FieldInfos(); fieldInfos.Add(doc); fieldInfos.Write(directory, segment + ".fnm"); // write Field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.AddDocument(doc); } finally { fieldsWriter.Close(); } // invert doc into postingTable postingTable.Clear(); // clear postingTable fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts float boost = doc.GetBoost(); for (int i = 0; i < fieldBoosts.Length; i++) { fieldBoosts[i] = boost; } InvertDocument(doc); // sort postingTable into an array Posting[] postings = SortPostingTable(); /* for (int i = 0; i < postings.length; i++) { Posting posting = postings[i]; System.out.print(posting.term); System.out.print(" freq=" + posting.freq); System.out.print(" pos="); System.out.print(posting.positions[0]); for (int j = 1; j < posting.freq; j++) System.out.print("," + posting.positions[j]); System.out.println(""); } */ // write postings WritePostings(postings, segment); // write norms of indexed fields WriteNorms(doc, segment); }
/// <summary> Adds a document to this index, using the provided analyzer instead of the /// value of {@link #GetAnalyzer()}. If the document contains more than /// {@link #maxFieldLength} terms for a given Field, the remainder are /// discarded. /// </summary> public virtual void AddDocument(Document doc, Analyzer analyzer) { DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength); System.String segmentName = NewSegmentName(); dw.AddDocument(segmentName, doc); lock (this) { segmentInfos.Add(new SegmentInfo(segmentName, 1, ramDirectory)); MaybeMergeSegments(); } }
public /*internal*/ Document Doc(int n) { indexStream.Seek(n * 8L); long position = indexStream.ReadLong(); fieldsStream.Seek(position); Document doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); byte bits = fieldsStream.ReadByte(); doc.Add(new Field(fi.name, fieldsStream.ReadString(), true, fi.isIndexed, (bits & 1) != 0, fi.storeTermVector)); // vector } return doc; }
private void WriteNorms(Document doc, System.String segment) { for (int n = 0; n < fieldInfos.Size(); n++) { FieldInfo fi = fieldInfos.FieldInfo(n); if (fi.isIndexed) { float norm = fieldBoosts[n] * similarity.LengthNorm(fi.name, fieldLengths[n]); OutputStream norms = directory.CreateFile(segment + ".f" + n); try { norms.WriteByte(Monodoc.Lucene.Net.Search.Similarity.EncodeNorm(norm)); } finally { norms.Close(); } } } }
public /*internal*/ Document Doc(int n) { indexStream.Seek(n * 8L); long position = indexStream.ReadLong(); fieldsStream.Seek(position); Document doc = new Document(); int numFields = fieldsStream.ReadVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.ReadVInt(); FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); byte bits = fieldsStream.ReadByte(); doc.Add(new Field(fi.name, fieldsStream.ReadString(), true, fi.isIndexed, (bits & 1) != 0, fi.storeTermVector)); // vector } return(doc); }
/// <summary> Adds a document to this index. If the document contains more than /// {@link #maxFieldLength} terms for a given Field, the remainder are /// discarded. /// </summary> public virtual void AddDocument(Document doc) { AddDocument(doc, analyzer); }
private void WriteNorms(Document doc, System.String segment) { for (int n = 0; n < fieldInfos.Size(); n++) { FieldInfo fi = fieldInfos.FieldInfo(n); if (fi.isIndexed) { float norm = fieldBoosts[n] * similarity.LengthNorm(fi.name, fieldLengths[n]); OutputStream norms = directory.CreateFile(segment + ".f" + n); try { norms.WriteByte(Monodoc.Lucene.Net.Search.Similarity.EncodeNorm(norm)); } finally { norms.Close(); } } } }
// Tokenizes the fields of a document into Postings. private void InvertDocument(Document doc) { foreach(Field field in doc.Fields()) { System.String fieldName = field.Name(); int fieldNumber = fieldInfos.FieldNumber(fieldName); int length = fieldLengths[fieldNumber]; // length of Field int position = fieldPositions[fieldNumber]; // position in Field if (field.IsIndexed()) { if (!field.IsTokenized()) { // un-tokenized Field AddPosition(fieldName, field.StringValue(), position++); length++; } else { System.IO.TextReader reader; // find or make Reader if (field.ReaderValue() != null) reader = field.ReaderValue(); else if (field.StringValue() != null) reader = new System.IO.StringReader(field.StringValue()); else throw new System.ArgumentException("Field must have either String or Reader value"); // Tokenize Field and add to postingTable TokenStream stream = analyzer.TokenStream(fieldName, reader); try { for (Token t = stream.Next(); t != null; t = stream.Next()) { position += (t.GetPositionIncrement() - 1); AddPosition(fieldName, t.TermText(), position++); if (++length > maxFieldLength) break; } } finally { stream.Close(); } } fieldLengths[fieldNumber] = length; // save Field length fieldPositions[fieldNumber] = position; // save Field position fieldBoosts[fieldNumber] *= field.GetBoost(); } } }
/// <summary> Adds a document to this index, using the provided analyzer instead of the /// value of {@link #GetAnalyzer()}. If the document contains more than /// {@link #maxFieldLength} terms for a given Field, the remainder are /// discarded. /// </summary> public virtual void AddDocument(Document doc, Analyzer analyzer) { DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength); System.String segmentName = NewSegmentName(); dw.AddDocument(segmentName, doc); lock (this) { segmentInfos.Add(new SegmentInfo(segmentName, 1, ramDirectory)); MaybeMergeSegments(); } }
/// <summary> Adds a document to this index. If the document contains more than /// {@link #maxFieldLength} terms for a given Field, the remainder are /// discarded. /// </summary> public virtual void AddDocument(Document doc) { AddDocument(doc, analyzer); }