public /*internal*/ void AddDocument(System.String segment, Document doc) { // write field names fieldInfos = new FieldInfos(); fieldInfos.Add(doc); fieldInfos.Write(directory, segment + ".fnm"); // write field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.AddDocument(doc); } finally { fieldsWriter.Close(); } // invert doc into postingTable postingTable.Clear(); // clear postingTable fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts float boost = doc.GetBoost(); for (int i = 0; i < fieldBoosts.Length; i++) { fieldBoosts[i] = boost; } InvertDocument(doc); // sort postingTable into an array Posting[] postings = SortPostingTable(); /* * for (int i = 0; i < postings.length; i++) { * Posting posting = postings[i]; * System.out.print(posting.term); * System.out.print(" freq=" + posting.freq); * System.out.print(" pos="); * System.out.print(posting.positions[0]); * for (int j = 1; j < posting.freq; j++) * System.out.print("," + posting.positions[j]); * System.out.println(""); * } */ // write postings WritePostings(postings, segment); // write norms of indexed fields WriteNorms(segment); }
public void AddDocument(System.String segment, Document doc) { // write field names fieldInfos = new FieldInfos(); fieldInfos.Add(doc); fieldInfos.Write(directory, segment + ".fnm"); // write field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.AddDocument(doc); } finally { fieldsWriter.Close(); } // invert doc into postingTable postingTable.Clear(); // clear postingTable fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions fieldOffsets = new int[fieldInfos.Size()]; // init fieldOffsets fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts float boost = doc.GetBoost(); for (int i = 0; i < fieldBoosts.Length; i++) { fieldBoosts[i] = boost; } InvertDocument(doc); // sort postingTable into an array Posting[] postings = SortPostingTable(); /* for (int i = 0; i < postings.length; i++) { Posting posting = postings[i]; System.out.print(posting.term); System.out.print(" freq=" + posting.freq); System.out.print(" pos="); System.out.print(posting.positions[0]); for (int j = 1; j < posting.freq; j++) System.out.print("," + posting.positions[j]); System.out.println(""); } */ // write postings WritePostings(postings, segment); // write norms of indexed fields WriteNorms(segment); }
public virtual void Test() { //Positive test of FieldInfos Assert.IsTrue(testDoc != null); FieldInfos fieldInfos = new FieldInfos(); fieldInfos.Add(testDoc); //Since the complement is stored as well in the fields map Assert.IsTrue(fieldInfos.Size() == DocHelper.all.Count); //this is all b/c we are using the no-arg constructor RAMDirectory dir = new RAMDirectory(); System.String name = "testFile"; IndexOutput output = dir.CreateOutput(name, null); Assert.IsTrue(output != null); //Use a RAMOutputStream try { fieldInfos.Write(output); output.Close(); Assert.IsTrue(output.Length > 0); FieldInfos readIn = new FieldInfos(dir, name, null); Assert.IsTrue(fieldInfos.Size() == readIn.Size()); FieldInfo info = readIn.FieldInfo("textField1"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == false); info = readIn.FieldInfo("textField2"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == true); Assert.IsTrue(info.omitNorms_ForNUnit == false); info = readIn.FieldInfo("textField3"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == true); info = readIn.FieldInfo("omitNorms"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == true); dir.Close(); } catch (System.IO.IOException) { Assert.IsTrue(false); } }
public virtual void Test() { //Positive test of FieldInfos Assert.IsTrue(testDoc != null); FieldInfos fieldInfos = new FieldInfos(); fieldInfos.Add(testDoc); //Since the complement is stored as well in the fields map Assert.IsTrue(fieldInfos.Size() == DocHelper.all.Count); //this is all b/c we are using the no-arg constructor RAMDirectory dir = new RAMDirectory(); System.String name = "testFile"; IndexOutput output = dir.CreateOutput(name); Assert.IsTrue(output != null); //Use a RAMOutputStream try { fieldInfos.Write(output); output.Close(); Assert.IsTrue(output.Length() > 0); FieldInfos readIn = new FieldInfos(dir, name); Assert.IsTrue(fieldInfos.Size() == readIn.Size()); FieldInfo info = readIn.FieldInfo("textField1"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == false); info = readIn.FieldInfo("textField2"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == true); Assert.IsTrue(info.omitNorms_ForNUnit == false); info = readIn.FieldInfo("textField3"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == true); info = readIn.FieldInfo("omitNorms"); Assert.IsTrue(info != null); Assert.IsTrue(info.storeTermVector_ForNUnit == false); Assert.IsTrue(info.omitNorms_ForNUnit == true); dir.Close(); } catch (System.IO.IOException e) { Assert.IsTrue(false); } }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) { if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge)); docCount++; } } } } finally { fieldsWriter.Close(); } return(docCount); }
public override void Flush(IDictionary <DocConsumerPerThread, DocConsumerPerThread> threads, SegmentWriteState state) { Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> > childThreadsAndFields = new Support.Dictionary <DocFieldConsumerPerThread, IList <DocFieldConsumerPerField> >(); foreach (DocFieldProcessorPerThread perThread in threads.Keys) { childThreadsAndFields[perThread.consumer] = perThread.Fields(); perThread.TrimFields(state); } fieldsWriter.Flush(state); consumer.Flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with // FieldInfo.storePayload. System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); fieldInfos.Write(state.directory, fileName); state.flushedFiles.Add(fileName); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader)readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) { if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j)); docCount++; } } } } finally { fieldsWriter.Close(); } return(docCount); }
internal override void Flush(ICollection <object> threads, DocumentsWriter.FlushState state) { IDictionary <object, ICollection <object> > childThreadsAndFields = new Dictionary <object, ICollection <object> >(); IEnumerator <object> it = threads.GetEnumerator(); while (it.MoveNext()) { DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread)it.Current; childThreadsAndFields[perThread.consumer] = perThread.Fields(); perThread.trimFields(state); } consumer.flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with // FieldInfo.storePayload. fieldInfos.Write(state.directory, state.segmentName + ".fnm"); }
public override void Flush(System.Collections.ICollection threads, SegmentWriteState state) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IEnumerator it = threads.GetEnumerator(); while (it.MoveNext()) { DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread)((System.Collections.DictionaryEntry)it.Current).Key; childThreadsAndFields[perThread.consumer] = perThread.Fields(); perThread.TrimFields(state); } fieldsWriter.Flush(state); consumer.Flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with // FieldInfo.storePayload. System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); fieldInfos.Write(state.directory, fileName); SupportClass.CollectionsHelper.AddIfNotContains(state.flushedFiles, fileName); }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j)); docCount++; } } } finally { fieldsWriter.Close(); } return docCount; }
/// <summary> </summary> /// <returns> The number of documents in all of the readers /// </returns> /// <throws> IOException </throws> private int MergeFields() { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false); AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false); fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.Write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this); try { for (int i = 0; i < readers.Count; i++) { IndexReader reader = (IndexReader) readers[i]; int maxDoc = reader.MaxDoc(); for (int j = 0; j < maxDoc; j++) if (!reader.IsDeleted(j)) { // skip deleted docs fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge)); docCount++; } } } finally { fieldsWriter.Close(); } return docCount; }