/*internal*/ public void AddDocument(System.String segment, Document doc) { // write Field names fieldInfos = new FieldInfos(); fieldInfos.Add(doc); fieldInfos.Write(directory, segment + ".fnm"); // write Field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.AddDocument(doc); } finally { fieldsWriter.Close(); } // invert doc into postingTable postingTable.Clear(); // clear postingTable fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts float boost = doc.GetBoost(); for (int i = 0; i < fieldBoosts.Length; i++) { fieldBoosts[i] = boost; } InvertDocument(doc); // sort postingTable into an array Posting[] postings = SortPostingTable(); /* * for (int i = 0; i < postings.length; i++) { * Posting posting = postings[i]; * System.out.print(posting.term); * System.out.print(" freq=" + posting.freq); * System.out.print(" pos="); * System.out.print(posting.positions[0]); * for (int j = 1; j < posting.freq; j++) * System.out.print("," + posting.positions[j]); * System.out.println(""); * } */ // write postings WritePostings(postings, segment); // write norms of indexed fields WriteNorms(doc, segment); }
/*internal*/ public void AddDocument(System.String segment, Document doc) { // write Field names fieldInfos = new FieldInfos(); fieldInfos.Add(doc); fieldInfos.Write(directory, segment + ".fnm"); // write Field values FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { fieldsWriter.AddDocument(doc); } finally { fieldsWriter.Close(); } // invert doc into postingTable postingTable.Clear(); // clear postingTable fieldLengths = new int[fieldInfos.Size()]; // init fieldLengths fieldPositions = new int[fieldInfos.Size()]; // init fieldPositions fieldBoosts = new float[fieldInfos.Size()]; // init fieldBoosts float boost = doc.GetBoost(); for (int i = 0; i < fieldBoosts.Length; i++) { fieldBoosts[i] = boost; } InvertDocument(doc); // sort postingTable into an array Posting[] postings = SortPostingTable(); /* for (int i = 0; i < postings.length; i++) { Posting posting = postings[i]; System.out.print(posting.term); System.out.print(" freq=" + posting.freq); System.out.print(" pos="); System.out.print(posting.positions[0]); for (int j = 1; j < posting.freq; j++) System.out.print("," + posting.positions[j]); System.out.println(""); } */ // write postings WritePostings(postings, segment); // write norms of indexed fields WriteNorms(doc, segment); }
private void CreateCompoundFile() { CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segment + ".cfs"); System.Collections.ArrayList files = new System.Collections.ArrayList(COMPOUND_EXTENSIONS.Length + fieldInfos.Size()); // Basic files for (int i = 0; i < COMPOUND_EXTENSIONS.Length; i++) { files.Add(segment + "." + COMPOUND_EXTENSIONS[i]); } // Field norm files for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed) { files.Add(segment + ".f" + i); } } // Vector files if (fieldInfos.HasVectors()) { for (int i = 0; i < VECTOR_EXTENSIONS.Length; i++) { files.Add(segment + "." + VECTOR_EXTENSIONS[i]); } } // Now merge all added files System.Collections.IEnumerator it = files.GetEnumerator(); while (it.MoveNext()) { cfsWriter.AddFile((System.String)it.Current); } // Perform the merge cfsWriter.Close(); // Now delete the source files it = files.GetEnumerator(); while (it.MoveNext()) { directory.DeleteFile((System.String)it.Current); } }
/// <summary>Create term vectors writer for the specified segment in specified /// directory. A new TermVectorsWriter should be created for each /// segment. The parameter <code>maxFields</code> indicates how many total /// fields are found in this document. Not all of these fields may require /// termvectors to be stored, so the number of calls to /// <code>openField</code> is less or equal to this number. /// </summary> public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) { // Open files for TermVector storage tvx = directory.CreateFile(segment + TVX_EXTENSION); tvx.WriteInt(FORMAT_VERSION); tvd = directory.CreateFile(segment + TVD_EXTENSION); tvd.WriteInt(FORMAT_VERSION); tvf = directory.CreateFile(segment + TVF_EXTENSION); tvf.WriteInt(FORMAT_VERSION); this.fieldInfos = fieldInfos; fields = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(fieldInfos.Size())); terms = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); }
internal System.Collections.ArrayList Files() { System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16)); System.String[] ext = new System.String[] { "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp" }; for (int i = 0; i < ext.Length; i++) { System.String name = segment + "." + ext[i]; if (Directory().FileExists(name)) { files.Add(name); } } for (int i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.isIndexed) { files.Add(segment + ".f" + i); } } return(files); }
private void WriteNorms(Document doc, System.String segment) { for (int n = 0; n < fieldInfos.Size(); n++) { FieldInfo fi = fieldInfos.FieldInfo(n); if (fi.isIndexed) { float norm = fieldBoosts[n] * similarity.LengthNorm(fi.name, fieldLengths[n]); OutputStream norms = directory.CreateFile(segment + ".f" + n); try { norms.WriteByte(Monodoc.Lucene.Net.Search.Similarity.EncodeNorm(norm)); } finally { norms.Close(); } } } }
/// <summary>Create term vectors writer for the specified segment in specified /// directory. A new TermVectorsWriter should be created for each /// segment. The parameter <code>maxFields</code> indicates how many total /// fields are found in this document. Not all of these fields may require /// termvectors to be stored, so the number of calls to /// <code>openField</code> is less or equal to this number. /// </summary> public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) { // Open files for TermVector storage tvx = directory.CreateFile(segment + TVX_EXTENSION); tvx.WriteInt(FORMAT_VERSION); tvd = directory.CreateFile(segment + TVD_EXTENSION); tvd.WriteInt(FORMAT_VERSION); tvf = directory.CreateFile(segment + TVF_EXTENSION); tvf.WriteInt(FORMAT_VERSION); this.fieldInfos = fieldInfos; fields = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(fieldInfos.Size())); terms = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10)); }