public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { if (tvx != null) { if (state.numDocsInStore > 0) { // In case there are some final documents that we // didn't see (because they hit a non-aborting exception): Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); } tvx.Flush(); tvd.Flush(); tvf.Flush(); } System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; System.Collections.IEnumerator it2 = ((System.Collections.ICollection)entry.Value).GetEnumerator(); while (it2.MoveNext()) { TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField)((System.Collections.DictionaryEntry)it2.Current).Key; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.Reset(true); } } }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary oneThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary twoThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable oneFields = new System.Collections.Hashtable(); System.Collections.Hashtable twoFields = new System.Collections.Hashtable(); while (fieldsIt.MoveNext()) { DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldsIt.Current; SupportClass.CollectionsHelper.AddIfNotContains(oneFields, perField.one); SupportClass.CollectionsHelper.AddIfNotContains(twoFields, perField.two); } oneThreadsAndFields[perThread.one] = oneFields; twoThreadsAndFields[perThread.two] = twoFields; } one.Flush(oneThreadsAndFields, state); two.Flush(twoThreadsAndFields, state); }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary endChildThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; DocInverterPerThread perThread = (DocInverterPerThread)entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable childFields = new System.Collections.Hashtable(); System.Collections.Hashtable endChildFields = new System.Collections.Hashtable(); while (fieldsIt.MoveNext()) { DocInverterPerField perField = (DocInverterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; childFields[perField.consumer] = perField.consumer; endChildFields[perField.endConsumer] = perField.endConsumer; } childThreadsAndFields[perThread.consumer] = childFields; endChildThreadsAndFields[perThread.endConsumer] = endChildFields; } consumer.Flush(childThreadsAndFields, state); endConsumer.Flush(endChildThreadsAndFields, state); }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { if (tvx != null) { if (state.numDocsInStore > 0) // In case there are some final documents that we // didn't see (because they hit a non-aborting exception): Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); tvx.Flush(); tvd.Flush(); tvf.Flush(); } System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.Collections.IEnumerator it2 = ((System.Collections.ICollection) entry.Value).GetEnumerator(); while (it2.MoveNext()) { TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) ((System.Collections.DictionaryEntry) it2.Current).Key; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key; perThread.termsHashPerThread.Reset(true); } } }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary endChildThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; DocInverterPerThread perThread = (DocInverterPerThread) entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable childFields = new System.Collections.Hashtable(); System.Collections.Hashtable endChildFields = new System.Collections.Hashtable(); while (fieldsIt.MoveNext()) { DocInverterPerField perField = (DocInverterPerField) ((System.Collections.DictionaryEntry) fieldsIt.Current).Key; childFields[perField.consumer] = perField.consumer; endChildFields[perField.endConsumer] = perField.endConsumer; } childThreadsAndFields[perThread.consumer] = childFields; endChildThreadsAndFields[perThread.endConsumer] = endChildFields; } consumer.Flush(childThreadsAndFields, state); endConsumer.Flush(endChildThreadsAndFields, state); }
public virtual CacheEntry[] GetCacheEntries() { System.Collections.IList result = new System.Collections.ArrayList(17); System.Collections.IEnumerator outerKeys = caches.Keys.GetEnumerator(); while (outerKeys.MoveNext()) { System.Type cacheType = (System.Type)outerKeys.Current; Cache cache = (Cache)caches[cacheType]; System.Collections.IEnumerator innerKeys = cache.readerCache.Keys.GetEnumerator(); while (innerKeys.MoveNext()) { // we've now materialized a hard ref System.Object readerKey = innerKeys.Current; // innerKeys was backed by WeakHashMap, sanity check // that it wasn't GCed before we made hard ref if (null != readerKey && cache.readerCache.Contains(readerKey)) { System.Collections.IDictionary innerCache = ((System.Collections.IDictionary)cache.readerCache[readerKey]); System.Collections.IEnumerator entrySetIterator = new System.Collections.Hashtable(innerCache).GetEnumerator(); while (entrySetIterator.MoveNext()) { System.Collections.DictionaryEntry mapEntry = (System.Collections.DictionaryEntry)entrySetIterator.Current; Entry entry = (Entry)mapEntry.Key; result.Add(new CacheEntryImpl(readerKey, entry.field, cacheType, entry.type, entry.custom, entry.locale, mapEntry.Value)); } } } } return((CacheEntry[])new System.Collections.ArrayList(result).ToArray(typeof(CacheEntry))); }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary oneThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary twoThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread)entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable oneFields = new System.Collections.Hashtable(); System.Collections.Hashtable twoFields = new System.Collections.Hashtable(); while (fieldsIt.MoveNext()) { DocFieldConsumersPerField perField = (DocFieldConsumersPerField)fieldsIt.Current; SupportClass.CollectionsHelper.AddIfNotContains(oneFields, perField.one); SupportClass.CollectionsHelper.AddIfNotContains(twoFields, perField.two); } oneThreadsAndFields[perThread.one] = oneFields; twoThreadsAndFields[perThread.two] = twoFields; } one.Flush(oneThreadsAndFields, state); two.Flush(twoThreadsAndFields, state); }
public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) { EnsureOpen(); System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry)i.Current; System.String field = (System.String)e.Key; IndexReader reader = (IndexReader)e.Value; reader.GetTermFreqVector(docNumber, field, mapper); } }
// make sure all the values in the maps match private void AssertSameValues(System.Collections.Hashtable m1, System.Collections.Hashtable m2) { int n = m1.Count; int m = m2.Count; Assert.AreEqual(n, m); System.Collections.IEnumerator iter = new System.Collections.Hashtable(m1).GetEnumerator(); while (iter.MoveNext()) { System.Object key = iter.Current; Assert.AreEqual(m1[key], m2[key]); } }
// get all vectors public override TermFreqVector[] GetTermFreqVectors(int n) { System.Collections.ArrayList results = new System.Collections.ArrayList(); System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry)i.Current; IndexReader reader = (IndexReader)e.Key; System.String field = (System.String)e.Value; TermFreqVector vector = reader.GetTermFreqVector(n, field); if (vector != null) { results.Add(vector); } } return((TermFreqVector[])(results.ToArray(typeof(TermFreqVector)))); }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { // NOTE: it's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the TermVectorsWriter. This is // actually OK (unlike in the stored fields case) // because, although IieldInfos.hasVectors() will return // true, the TermVectorsReader gracefully handles // non-existence of the term vectors files. if (tvx != null) { if (state.numDocsInStore > 0) { // In case there are some final documents that we // didn't see (because they hit a non-aborting exception): Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); } tvx.Flush(); tvd.Flush(); tvf.Flush(); } System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; System.Collections.IEnumerator it2 = ((System.Collections.ICollection)entry.Value).GetEnumerator(); while (it2.MoveNext()) { TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField)((System.Collections.DictionaryEntry)it2.Current).Key; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.Reset(true); } } }
public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { // NOTE: it's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the TermVectorsWriter. This is // actually OK (unlike in the stored fields case) // because, although IieldInfos.hasVectors() will return // true, the TermVectorsReader gracefully handles // non-existence of the term vectors files. if (tvx != null) { if (state.numDocsInStore > 0) // In case there are some final documents that we // didn't see (because they hit a non-aborting exception): Fill(state.numDocsInStore - docWriter.GetDocStoreOffset()); tvx.Flush(); tvd.Flush(); tvf.Flush(); } System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.Collections.IEnumerator it2 = ((System.Collections.ICollection) entry.Value).GetEnumerator(); while (it2.MoveNext()) { TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) ((System.Collections.DictionaryEntry) it2.Current).Key; perField.termsHashPerField.Reset(); perField.ShrinkHash(); } TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key; perThread.termsHashPerThread.Reset(true); } } }
// get all vectors public override TermFreqVector[] GetTermFreqVectors(int n) { System.Collections.ArrayList results = new System.Collections.ArrayList(); System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current; //IndexReader reader = (IndexReader) e.Key; // {{Aroush}} which is right, those two lines? //System.String field = (System.String) e.Value; System.String field = (System.String) e.Key; // {{Aroush-2.0}} or those two lines? IndexReader reader = (IndexReader) e.Value; TermFreqVector vector = reader.GetTermFreqVector(n, field); if (vector != null) results.Add(vector); } return (TermFreqVector[]) (results.ToArray(typeof(TermFreqVector))); }
public virtual void TestMapper() { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, mapper); System.Collections.Generic.SortedDictionary<Object,Object> set_Renamed = mapper.GetTermVectorEntrySet(); Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Check offsets and positions for (System.Collections.IEnumerator iterator = set_Renamed.Keys.GetEnumerator(); iterator.MoveNext(); ) { TermVectorEntry tve = (TermVectorEntry) iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(1, mapper); set_Renamed = mapper.GetTermVectorEntrySet(); Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Should have offsets and positions b/c we are munging all the fields together for (System.Collections.IEnumerator iterator = set_Renamed.Keys.GetEnumerator(); iterator.MoveNext(); ) { TermVectorEntry tve = (TermVectorEntry) iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); System.Collections.IDictionary map = fsMapper.GetFieldToTerms(); Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (System.Collections.IEnumerator iterator = new System.Collections.Hashtable(map).GetEnumerator(); iterator.MoveNext(); ) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iterator.Current; System.Collections.Generic.SortedDictionary<Object,Object> sortedSet = (System.Collections.Generic.SortedDictionary<Object,Object>)entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (System.Collections.IEnumerator inner = sortedSet.Keys.GetEnumerator(); inner.MoveNext(); ) { TermVectorEntry tve = (TermVectorEntry) inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.GetField(); if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } //Try mapper that ignores offs and positions fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); map = fsMapper.GetFieldToTerms(); Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (System.Collections.IEnumerator iterator = new System.Collections.Hashtable(map).GetEnumerator(); iterator.MoveNext(); ) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iterator.Current; System.Collections.Generic.SortedDictionary<Object,Object> sortedSet = (System.Collections.Generic.SortedDictionary<Object,Object>)entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (System.Collections.IEnumerator inner = sortedSet.Keys.GetEnumerator(); inner.MoveNext(); ) { TermVectorEntry tve = (TermVectorEntry) inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.GetField(); if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } // test setDocumentNumber() IndexReader ir = IndexReader.Open(dir); DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper(); Assert.AreEqual(- 1, docNumAwareMapper.GetDocumentNumber()); ir.GetTermFreqVector(0, docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(- 1); ir.GetTermFreqVector(1, docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(- 1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(- 1); ir.GetTermFreqVector(1, "f2", docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(- 1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); ir.Close(); }
/// <summary> Commit all segment reader in the pool.</summary> /// <throws> IOException </throws> internal virtual void Commit() { lock (this) { System.Collections.IEnumerator iter = new System.Collections.Hashtable(readerMap).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry ent = (System.Collections.DictionaryEntry) iter.Current; SegmentReader sr = (SegmentReader) ent.Value; if (sr.hasChanges) { System.Diagnostics.Debug.Assert(InfoIsLive(sr.GetSegmentInfo())); sr.StartCommit(); bool success = false; try { sr.DoCommit(null); success = true; } finally { if (!success) { sr.RollbackCommit(); } } } } } }
// map must be Map<String, String> public virtual void WriteStringStringMap(System.Collections.IDictionary map) { if (map == null) { WriteInt(0); } else { WriteInt(map.Count); System.Collections.IEnumerator it = new System.Collections.Hashtable(map).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; WriteString((System.String) entry.Key); WriteString((System.String) entry.Value); } } }
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary byField = new System.Collections.Hashtable(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.ArrayList fieldsToRemove = new System.Collections.ArrayList(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; if (perField.upto > 0) { // It has some norms System.Collections.IList l = (System.Collections.IList)byField[perField.fieldInfo]; if (l == null) { l = new System.Collections.ArrayList(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields; for (int i = 0; i < fieldsToRemove.Count; i++) { fieldsHT.Remove(fieldsToRemove[i]); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles[normsFileName] = normsFileName; IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); System.Collections.IList toMerge = (System.Collections.IList)byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) { fields[j] = (NormsWriterPerField)toMerge[j]; } int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" + (fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) { normsOut.WriteByte(defaultNorm); } normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) { normsOut.WriteByte(defaultNorm); } } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" + (4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer()); } } finally { normsOut.Close(); } }
// get all vectors public override TermFreqVector[] GetTermFreqVectors(int n) { EnsureOpen(); System.Collections.ArrayList results = new System.Collections.ArrayList(); System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current; System.String field = (System.String) e.Key; IndexReader reader = (IndexReader) e.Value; TermFreqVector vector = reader.GetTermFreqVector(n, field); if (vector != null) results.Add(vector); } return (TermFreqVector[]) results.ToArray(typeof(TermFreqVector)); }
internal override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new System.Collections.Hashtable(); } else { nextThreadsAndFields = null; } System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; TermsHashPerThread perThread = (TermsHashPerThread)entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable childFields = new System.Collections.Hashtable(); System.Collections.Hashtable nextChildFields; if (nextTermsHash != null) { nextChildFields = new System.Collections.Hashtable(); } else { nextChildFields = null; } while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; childFields[perField.consumer] = perField.consumer; if (nextTermsHash != null) { nextChildFields[perField.nextPerField] = perField.nextPerField; } } childThreadsAndFields[perThread.consumer] = childFields; if (nextTermsHash != null) { nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; } } consumer.Flush(childThreadsAndFields, state); ShrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) { nextTermsHash.Flush(nextThreadsAndFields, state); } } }
// make sure all the values in the maps match private void AssertSameValues(System.Collections.Hashtable m1, System.Collections.Hashtable m2) { int n = m1.Count; int m = m2.Count; Assert.AreEqual(n, m); System.Collections.IEnumerator iter = new System.Collections.Hashtable().GetEnumerator(); while (iter.MoveNext()) { System.Object key = iter.Current; System.Object o1 = m1[key]; System.Object o2 = m2[key]; if (o1 is System.Single) { Assert.AreEqual((float) ((System.Single) o1), (float) ((System.Single) o2), 1e-6); } else { Assert.AreEqual(m1[key], m2[key]); } } }
// Remaps all buffered deletes based on a completed // merge internal virtual void Remap(MergeDocIDRemapper mapper, SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount) { lock (this) { System.Collections.IDictionary newDeleteTerms; // Remap delete-by-term if (terms.Count > 0) { if (doTermSort) { newDeleteTerms = new System.Collections.Generic.SortedDictionary <object, object>(); } else { newDeleteTerms = new System.Collections.Hashtable(); } System.Collections.IEnumerator iter = new System.Collections.Hashtable(terms).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)iter.Current; Num num = (Num)entry.Value; newDeleteTerms[entry.Key] = new Num(mapper.Remap(num.GetNum())); } } else { newDeleteTerms = null; } // Remap delete-by-docID System.Collections.ArrayList newDeleteDocIDs; if (docIDs.Count > 0) { newDeleteDocIDs = new System.Collections.ArrayList(docIDs.Count); System.Collections.IEnumerator iter = docIDs.GetEnumerator(); while (iter.MoveNext()) { System.Int32 num = (System.Int32)iter.Current; newDeleteDocIDs.Add((System.Int32)mapper.Remap(num)); } } else { newDeleteDocIDs = null; } // Remap delete-by-query System.Collections.Hashtable newDeleteQueries; if (queries.Count > 0) { newDeleteQueries = new System.Collections.Hashtable(queries.Count); System.Collections.IEnumerator iter = new System.Collections.Hashtable(queries).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)iter.Current; System.Int32 num = (System.Int32)entry.Value; newDeleteQueries[entry.Key] = (System.Int32)mapper.Remap(num); } } else { newDeleteQueries = null; } if (newDeleteTerms != null) { terms = newDeleteTerms; } if (newDeleteDocIDs != null) { docIDs = newDeleteDocIDs; } if (newDeleteQueries != null) { queries = newDeleteQueries; } } }
/// <summary>This constructor is only used for {@link #Reopen()} </summary> internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.IDictionary oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor) { this.directory = directory; this.readOnly = readOnly; this.segmentInfos = infos; this.termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously // properly sync'd: SupportClass.CollectionsHelper.AddAllIfNotContains(synced, infos.Files(directory, true)); } // we put the old SegmentReaders in a map, that allows us // to lookup a reader using its segment name System.Collections.IDictionary segmentReaders = new System.Collections.Hashtable(); if (oldReaders != null) { // create a Map SegmentName->SegmentReader for (int i = 0; i < oldReaders.Length; i++) { segmentReaders[oldReaders[i].GetSegmentName()] = (System.Int32) i; } } SegmentReader[] newReaders = new SegmentReader[infos.Count]; // remember which readers are shared between the old and the re-opened // DirectoryReader - we have to incRef those readers bool[] readerShared = new bool[infos.Count]; for (int i = infos.Count - 1; i >= 0; i--) { // find SegmentReader for this segment if (!segmentReaders.Contains(infos.Info(i).name)) { // this is a new segment, no old SegmentReader can be reused newReaders[i] = null; } else { System.Int32 oldReaderIndex = (System.Int32) segmentReaders[infos.Info(i).name]; // there is an old reader for this segment - we'll try to reopen it newReaders[i] = oldReaders[oldReaderIndex]; } bool success = false; try { SegmentReader newReader; if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].GetSegmentInfo().GetUseCompoundFile()) { // We should never see a totally new segment during cloning System.Diagnostics.Debug.Assert(!doClone); // this is a new reader; in case we hit an exception we can close it safely newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor); } else { newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly); } if (newReader == newReaders[i]) { // this reader will be shared between the old and the new one, // so we must incRef it readerShared[i] = true; newReader.IncRef(); } else { readerShared[i] = false; newReaders[i] = newReader; } success = true; } finally { if (!success) { for (i++; i < infos.Count; i++) { if (newReaders[i] != null) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, // we can close it newReaders[i].Close(); } else { // this subReader is also used by the old reader, so instead // closing we must decRef it newReaders[i].DecRef(); } } catch (System.IO.IOException ignore) { // keep going - we want to clean up as much as possible } } } } } } // initialize the readers to calculate maxDoc before we try to reuse the old normsCache Initialize(newReaders); // try to copy unchanged norms from the old normsCache to the new one if (oldNormsCache != null) { System.Collections.IEnumerator it = new System.Collections.Hashtable(oldNormsCache).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.String field = (System.String) entry.Key; if (!HasNorms(field)) { continue; } byte[] oldBytes = (byte[]) entry.Value; byte[] bytes = new byte[MaxDoc()]; for (int i = 0; i < subReaders.Length; i++) { bool exist = segmentReaders.Contains(subReaders[i].GetSegmentName()); int oldReaderIndex = -1; if (exist) { oldReaderIndex = (System.Int32)segmentReaders[subReaders[i].GetSegmentName()]; } // this SegmentReader was not re-opened, we can copy all of its norms if (exist && (oldReaders[oldReaderIndex] == subReaders[i] || oldReaders[oldReaderIndex].norms[field] == subReaders[i].norms[field])) { // we don't have to synchronize here: either this constructor is called from a SegmentReader, // in which case no old norms cache is present, or it is called from MultiReader.reopen(), // which is synchronized Array.Copy(oldBytes, oldStarts[oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]); } else { subReaders[i].Norms(field, bytes, starts[i]); } } normsCache[field] = bytes; // update cache } } }
/// <summary>Produce _X.nrm if any document had a field with norms /// not disabled /// </summary> public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { System.Collections.IDictionary byField = new System.Collections.Hashtable(); // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the // same FieldInfo System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.ArrayList fieldsToRemove = new System.Collections.ArrayList(); while (fieldsIt.MoveNext()) { NormsWriterPerField perField = (NormsWriterPerField) ((System.Collections.DictionaryEntry) fieldsIt.Current).Key; if (perField.upto > 0) { // It has some norms System.Collections.IList l = (System.Collections.IList) byField[perField.fieldInfo]; if (l == null) { l = new System.Collections.ArrayList(); byField[perField.fieldInfo] = l; } l.Add(perField); } // Remove this field since we haven't seen it // since the previous flush else { fieldsToRemove.Add(perField); } } System.Collections.Hashtable fieldsHT = (System.Collections.Hashtable)fields; for (int i = 0; i < fieldsToRemove.Count; i++) { fieldsHT.Remove(fieldsToRemove[i]); } } System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; state.flushedFiles[normsFileName] = normsFileName; IndexOutput normsOut = state.directory.CreateOutput(normsFileName); try { normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); int numField = fieldInfos.Size(); int normCount = 0; for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); System.Collections.IList toMerge = (System.Collections.IList) byField[fieldInfo]; int upto = 0; if (toMerge != null) { int numFields = toMerge.Count; normCount++; NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; int[] uptos = new int[numFields]; for (int j = 0; j < numFields; j++) fields[j] = (NormsWriterPerField) toMerge[j]; int numLeft = numFields; while (numLeft > 0) { System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" +(fields [0].docIDs.Length)); int minLoc = 0; int minDocID = fields[0].docIDs[uptos[0]]; for (int j = 1; j < numLeft; j++) { int docID = fields[j].docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } System.Diagnostics.Debug.Assert(minDocID < state.numDocs); // Fill hole for (; upto < minDocID; upto++) normsOut.WriteByte(defaultNorm); normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); (uptos[minLoc])++; upto++; if (uptos[minLoc] == fields[minLoc].upto) { fields[minLoc].Reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } numLeft--; } } // Fill final hole with defaultNorm for (; upto < state.numDocs; upto++) normsOut.WriteByte(defaultNorm); } else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { normCount++; // Fill entire field with default norm: for (; upto < state.numDocs; upto++) normsOut.WriteByte(defaultNorm); } System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.GetFilePointer(), ".nrm file size mismatch: expected=" +(4 + normCount * state.numDocs) + " actual=" + normsOut.GetFilePointer()); } } finally { normsOut.Close(); } }
public virtual void Test() { PositionBasedTermVectorMapper mapper = new PositionBasedTermVectorMapper(); mapper.SetExpectations("test", tokens.Length, true, true); //Test single position for (int i = 0; i < tokens.Length; i++) { System.String token = tokens[i]; mapper.Map(token, 1, null, thePositions[i]); } System.Collections.IDictionary map = mapper.GetFieldToTerms(); Assert.IsTrue(map != null, "map is null and it shouldn't be"); Assert.IsTrue(map.Count == 1, "map Size: " + map.Count + " is not: " + 1); System.Collections.IDictionary positions = (System.Collections.IDictionary) map["test"]; Assert.IsTrue(positions != null, "thePositions is null and it shouldn't be"); Assert.IsTrue(positions.Count == numPositions, "thePositions Size: " + positions.Count + " is not: " + numPositions); System.Collections.BitArray bits = new System.Collections.BitArray((numPositions % 64 == 0?numPositions / 64:numPositions / 64 + 1) * 64); for (System.Collections.IEnumerator iterator = new System.Collections.Hashtable(positions).GetEnumerator(); iterator.MoveNext(); ) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iterator.Current; PositionBasedTermVectorMapper.TVPositionInfo info = (PositionBasedTermVectorMapper.TVPositionInfo) entry.Value; Assert.IsTrue(info != null, "info is null and it shouldn't be"); int pos = ((System.Int32) entry.Key); bits.Set(pos, true); Assert.IsTrue(info.GetPosition() == pos, info.GetPosition() + " does not equal: " + pos); Assert.IsTrue(info.GetOffsets() != null, "info.getOffsets() is null and it shouldn't be"); if (pos == 0) { Assert.IsTrue(info.GetTerms().Count == 2, "info.getTerms() Size: " + info.GetTerms().Count + " is not: " + 2); //need a test for multiple terms at one pos Assert.IsTrue(info.GetOffsets().Count == 2, "info.getOffsets() Size: " + info.GetOffsets().Count + " is not: " + 2); } else { Assert.IsTrue(info.GetTerms().Count == 1, "info.getTerms() Size: " + info.GetTerms().Count + " is not: " + 1); //need a test for multiple terms at one pos Assert.IsTrue(info.GetOffsets().Count == 1, "info.getOffsets() Size: " + info.GetOffsets().Count + " is not: " + 1); } } int cardinality = 0; for (int i = 0; i < bits.Count; i++) { if (bits.Get(i)) cardinality++; } Assert.IsTrue(cardinality == numPositions, "Bits are not all on"); }
// Remaps all buffered deletes based on a completed // merge internal virtual void Remap(MergeDocIDRemapper mapper, SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount) { lock (this) { System.Collections.IDictionary newDeleteTerms; // Remap delete-by-term if (terms.Count > 0) { if (doTermSort) { newDeleteTerms = new System.Collections.Generic.SortedDictionary<object, object>(); } else { newDeleteTerms = new System.Collections.Hashtable(); } System.Collections.IEnumerator iter = new System.Collections.Hashtable(terms).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Num num = (Num) entry.Value; newDeleteTerms[entry.Key] = new Num(mapper.Remap(num.GetNum())); } } else newDeleteTerms = null; // Remap delete-by-docID System.Collections.ArrayList newDeleteDocIDs; if (docIDs.Count > 0) { newDeleteDocIDs = new System.Collections.ArrayList(docIDs.Count); System.Collections.IEnumerator iter = docIDs.GetEnumerator(); while (iter.MoveNext()) { System.Int32 num = (System.Int32) iter.Current; newDeleteDocIDs.Add((System.Int32) mapper.Remap(num)); } } else newDeleteDocIDs = null; // Remap delete-by-query System.Collections.Hashtable newDeleteQueries; if (queries.Count > 0) { newDeleteQueries = new System.Collections.Hashtable(queries.Count); System.Collections.IEnumerator iter = new System.Collections.Hashtable(queries).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; System.Int32 num = (System.Int32) entry.Value; newDeleteQueries[entry.Key] = (System.Int32) mapper.Remap(num); } } else newDeleteQueries = null; if (newDeleteTerms != null) terms = newDeleteTerms; if (newDeleteDocIDs != null) docIDs = newDeleteDocIDs; if (newDeleteQueries != null) queries = newDeleteQueries; } }
public virtual void TestMapper() { TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); Assert.IsTrue(reader != null); SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, mapper); System.Collections.Generic.SortedDictionary <Object, Object> set_Renamed = mapper.GetTermVectorEntrySet(); Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Check offsets and positions for (System.Collections.IEnumerator iterator = set_Renamed.Keys.GetEnumerator(); iterator.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(1, mapper); set_Renamed = mapper.GetTermVectorEntrySet(); Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); //three fields, 4 terms, all terms are the same Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); //Should have offsets and positions b/c we are munging all the fields together for (System.Collections.IEnumerator iterator = set_Renamed.Keys.GetEnumerator(); iterator.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)iterator.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); System.Collections.IDictionary map = fsMapper.GetFieldToTerms(); Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (System.Collections.IEnumerator iterator = new System.Collections.Hashtable(map).GetEnumerator(); iterator.MoveNext();) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)iterator.Current; System.Collections.Generic.SortedDictionary <Object, Object> sortedSet = (System.Collections.Generic.SortedDictionary <Object, Object>)entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (System.Collections.IEnumerator inner = sortedSet.Keys.GetEnumerator(); inner.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.GetField(); if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } //Try mapper that ignores offs and positions fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator()); reader.Get(0, fsMapper); map = fsMapper.GetFieldToTerms(); Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); for (System.Collections.IEnumerator iterator = new System.Collections.Hashtable(map).GetEnumerator(); iterator.MoveNext();) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)iterator.Current; System.Collections.Generic.SortedDictionary <Object, Object> sortedSet = (System.Collections.Generic.SortedDictionary <Object, Object>)entry.Value; Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); for (System.Collections.IEnumerator inner = sortedSet.Keys.GetEnumerator(); inner.MoveNext();) { TermVectorEntry tve = (TermVectorEntry)inner.Current; Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); //Check offsets and positions. Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); System.String field = tve.GetField(); if (field.Equals(testFields[0])) { //should have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be"); } else if (field.Equals(testFields[1])) { //should not have offsets Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); } } } // test setDocumentNumber() IndexReader ir = IndexReader.Open(dir); DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper(); Assert.AreEqual(-1, docNumAwareMapper.GetDocumentNumber()); ir.GetTermFreqVector(0, docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(1, docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(1, "f2", docNumAwareMapper); Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); docNumAwareMapper.SetDocumentNumber(-1); ir.GetTermFreqVector(0, "f1", docNumAwareMapper); Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); ir.Close(); }
/// <summary>Forcefully clear changes for the specifed segments, /// and remove from the pool. This is called on succesful merge. /// </summary> internal virtual void Clear(SegmentInfos infos) { lock (this) { if (infos == null) { System.Collections.IEnumerator iter = new System.Collections.Hashtable(readerMap).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry ent = (System.Collections.DictionaryEntry) iter.Current; ((SegmentReader) ent.Value).hasChanges = false; } } else { int numSegments = infos.Count; for (int i = 0; i < numSegments; i++) { SegmentInfo info = infos.Info(i); if (readerMap.Contains(info)) { ((SegmentReader) readerMap[info]).hasChanges = false; } } } } }
// TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates System.Collections.ArrayList allFields = new System.Collections.ArrayList(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); while (fieldsIt.MoveNext()) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) ((System.Collections.DictionaryEntry) fieldsIt.Current).Key; if (perField.termsHashPerField.numPostings > 0) allFields.Add(perField); } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; // TODO: allow Lucene user to customize this consumer: FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); /* Current writer chain: FormatPostingsFieldsConsumer -> IMPL: FormatPostingsFieldsWriter -> FormatPostingsTermsConsumer -> IMPL: FormatPostingsTermsWriter -> FormatPostingsDocConsumer -> IMPL: FormatPostingsDocWriter -> FormatPostingsPositionsConsumer -> IMPL: FormatPostingsPositionsWriter */ int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = ((FreqProxTermsWriterPerField) allFields[start]).fieldInfo; System.String fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && ((FreqProxTermsWriterPerField) allFields[end]).fieldInfo.name.Equals(fieldName)) end++; FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = (FreqProxTermsWriterPerField) allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(fields, consumer); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.Reset(); perField.ShrinkHash(numPostings); fields[i].Reset(); } start = end; } it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.Key; perThread.termsHashPerThread.Reset(true); } consumer.Finish(); }
/// <summary>Remove all our references to readers, and commits /// any pending changes. /// </summary> internal virtual void Close() { lock (this) { System.Collections.IEnumerator iter = new System.Collections.Hashtable(readerMap).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry ent = (System.Collections.DictionaryEntry) iter.Current; SegmentReader sr = (SegmentReader) ent.Value; if (sr.hasChanges) { System.Diagnostics.Debug.Assert(InfoIsLive(sr.GetSegmentInfo())); sr.DoCommit(null); // Must checkpoint w/ deleter, because this // segment reader will have created new _X_N.del // file. enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false); } readerMap.Remove(ent.Key); // NOTE: it is allowed that this decRef does not // actually close the SR; this can happen when a // near real-time reader is kept open after the // IndexWriter instance is closed sr.DecRef(); } } }
// TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... public override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { // Gather all FieldData's that have postings, across all // ThreadStates System.Collections.ArrayList allFields = new System.Collections.ArrayList(); System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; System.Collections.ICollection fields = (System.Collections.ICollection)entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); while (fieldsIt.MoveNext()) { FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)((System.Collections.DictionaryEntry)fieldsIt.Current).Key; if (perField.termsHashPerField.numPostings > 0) { allFields.Add(perField); } } } // Sort by field name allFields.Sort(); int numAllFields = allFields.Count; // TODO: allow Lucene user to customize this consumer: FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); /* * Current writer chain: * FormatPostingsFieldsConsumer * -> IMPL: FormatPostingsFieldsWriter * -> FormatPostingsTermsConsumer * -> IMPL: FormatPostingsTermsWriter * -> FormatPostingsDocConsumer * -> IMPL: FormatPostingsDocWriter * -> FormatPostingsPositionsConsumer * -> IMPL: FormatPostingsPositionsWriter */ int start = 0; while (start < numAllFields) { FieldInfo fieldInfo = ((FreqProxTermsWriterPerField)allFields[start]).fieldInfo; System.String fieldName = fieldInfo.name; int end = start + 1; while (end < numAllFields && ((FreqProxTermsWriterPerField)allFields[end]).fieldInfo.name.Equals(fieldName)) { end++; } FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; for (int i = start; i < end; i++) { fields[i - start] = (FreqProxTermsWriterPerField)allFields[i]; // Aggregate the storePayload as seen by the same // field across multiple threads fieldInfo.storePayloads |= fields[i - start].hasPayloads; } // If this field has postings then add them to the // segment AppendPostings(fields, consumer); for (int i = 0; i < fields.Length; i++) { TermsHashPerField perField = fields[i].termsHashPerField; int numPostings = perField.numPostings; perField.Reset(); perField.ShrinkHash(numPostings); fields[i].Reset(); } start = end; } it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry)it.Current; FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread)entry.Key; perThread.termsHashPerThread.Reset(true); } consumer.Finish(); }
/// <summary> Commit all segment reader in the pool.</summary> /// <throws> IOException </throws> internal virtual void Commit() { lock (this) { System.Collections.IEnumerator iter = new System.Collections.Hashtable(readerMap).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry ent = (System.Collections.DictionaryEntry) iter.Current; SegmentReader sr = (SegmentReader) ent.Value; if (sr.hasChanges) { System.Diagnostics.Debug.Assert(InfoIsLive(sr.GetSegmentInfo())); sr.DoCommit(null); // Must checkpoint w/ deleter, because this // segment reader will have created new _X_N.del // file. enclosingInstance.deleter.Checkpoint(enclosingInstance.segmentInfos, false); } } } }
public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) { EnsureOpen(); System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator(); while (i.MoveNext()) { System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current; System.String field = (System.String) e.Key; IndexReader reader = (IndexReader) e.Value; reader.GetTermFreqVector(docNumber, field, mapper); } }
public virtual CacheEntry[] GetCacheEntries() { System.Collections.IList result = new System.Collections.ArrayList(17); System.Collections.IEnumerator outerKeys = caches.Keys.GetEnumerator(); while (outerKeys.MoveNext()) { System.Type cacheType = (System.Type) outerKeys.Current; Cache cache = (Cache) caches[cacheType]; System.Collections.IEnumerator innerKeys = cache.readerCache.Keys.GetEnumerator(); while (innerKeys.MoveNext()) { // we've now materialized a hard ref System.Object readerKey = innerKeys.Current; // innerKeys was backed by WeakHashMap, sanity check // that it wasn't GCed before we made hard ref if (null != readerKey && cache.readerCache.Contains(readerKey)) { System.Collections.IDictionary innerCache = ((System.Collections.IDictionary) cache.readerCache[readerKey]); System.Collections.IEnumerator entrySetIterator = new System.Collections.Hashtable(innerCache).GetEnumerator(); while (entrySetIterator.MoveNext()) { System.Collections.DictionaryEntry mapEntry = (System.Collections.DictionaryEntry) entrySetIterator.Current; Entry entry = (Entry) mapEntry.Key; result.Add(new CacheEntryImpl(readerKey, entry.field, cacheType, entry.type, entry.custom, entry.locale, mapEntry.Value)); } } } } return (CacheEntry[]) new System.Collections.ArrayList(result).ToArray(typeof(CacheEntry)); }
// Apply buffered delete terms to this reader. private void ApplyDeletes(System.Collections.Hashtable deleteTerms, IndexReader reader) { System.Collections.IEnumerator iter = new System.Collections.Hashtable(deleteTerms).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; reader.DeleteDocuments((Term) entry.Key); } }
internal override void Flush(System.Collections.IDictionary threadsAndFields, SegmentWriteState state) { lock (this) { System.Collections.IDictionary childThreadsAndFields = new System.Collections.Hashtable(); System.Collections.IDictionary nextThreadsAndFields; if (nextTermsHash != null) { nextThreadsAndFields = new System.Collections.Hashtable(); } else nextThreadsAndFields = null; System.Collections.IEnumerator it = new System.Collections.Hashtable(threadsAndFields).GetEnumerator(); while (it.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) it.Current; TermsHashPerThread perThread = (TermsHashPerThread) entry.Key; System.Collections.ICollection fields = (System.Collections.ICollection) entry.Value; System.Collections.IEnumerator fieldsIt = fields.GetEnumerator(); System.Collections.Hashtable childFields = new System.Collections.Hashtable(); System.Collections.Hashtable nextChildFields; if (nextTermsHash != null) { nextChildFields = new System.Collections.Hashtable(); } else nextChildFields = null; while (fieldsIt.MoveNext()) { TermsHashPerField perField = (TermsHashPerField) ((System.Collections.DictionaryEntry) fieldsIt.Current).Key; childFields[perField.consumer] = perField.consumer; if (nextTermsHash != null) nextChildFields[perField.nextPerField] = perField.nextPerField; } childThreadsAndFields[perThread.consumer] = childFields; if (nextTermsHash != null) nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; } consumer.Flush(childThreadsAndFields, state); ShrinkFreePostings(threadsAndFields, state); if (nextTermsHash != null) nextTermsHash.Flush(nextThreadsAndFields, state); } }
// Apply buffered delete terms to the segment just flushed from ram // apply appropriately so that a delete term is only applied to // the documents buffered before it, not those buffered after it. private void ApplyDeletesSelectively(System.Collections.Hashtable deleteTerms, IndexReader reader) { System.Collections.IEnumerator iter = new System.Collections.Hashtable(deleteTerms).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Term term = (Term) entry.Key; TermDocs docs = reader.TermDocs(term); if (docs != null) { int num = ((Num) entry.Value).GetNum(); try { while (docs.Next()) { int doc = docs.Doc(); if (doc >= num) { break; } reader.DeleteDocument(doc); } } finally { docs.Close(); } } } }
// Apply buffered delete terms, queries and docIDs to the // provided reader private bool ApplyDeletes(IndexReader reader, int docIDStart) { lock (this) { int docEnd = docIDStart + reader.MaxDoc(); bool any = false; System.Diagnostics.Debug.Assert(CheckDeleteTerm(null)); // Delete by term //System.Collections.IEnumerator iter = new System.Collections.Hashtable(deletesFlushed.terms).GetEnumerator(); System.Collections.IEnumerator iter = deletesFlushed.terms.GetEnumerator(); TermDocs docs = reader.TermDocs(); try { while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Term term = (Term) entry.Key; // LUCENE-2086: we should be iterating a TreeMap, // here, so terms better be in order: System.Diagnostics.Debug.Assert(CheckDeleteTerm(term)); docs.Seek(term); int limit = ((BufferedDeletes.Num) entry.Value).GetNum(); while (docs.Next()) { int docID = docs.Doc(); if (docIDStart + docID >= limit) break; reader.DeleteDocument(docID); any = true; } } } finally { docs.Close(); } // Delete by docID iter = deletesFlushed.docIDs.GetEnumerator(); while (iter.MoveNext()) { int docID = ((System.Int32) iter.Current); if (docID >= docIDStart && docID < docEnd) { reader.DeleteDocument(docID - docIDStart); any = true; } } // Delete by query IndexSearcher searcher = new IndexSearcher(reader); iter = new System.Collections.Hashtable(deletesFlushed.queries).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current; Query query = (Query) entry.Key; int limit = ((System.Int32) entry.Value); Weight weight = query.Weight(searcher); Scorer scorer = weight.Scorer(reader, true, false); if (scorer != null) { while (true) { int doc = scorer.NextDoc(); if (((long) docIDStart) + doc >= limit) break; reader.DeleteDocument(doc); any = true; } } } searcher.Close(); return any; } }
/// <summary> Performs a clone of all {@link AttributeImpl} instances returned in a new /// AttributeSource instance. This method can be used to e.g. create another TokenStream /// with exactly the same attributes (using {@link #AttributeSource(AttributeSource)}) /// </summary> public virtual AttributeSource CloneAttributes() { AttributeSource clone = new AttributeSource(this.factory); // first clone the impls if (HasAttributes()) { if (currentState == null) { ComputeCurrentState(); } for (State state = currentState; state != null; state = state.next) { clone.attributeImpls[state.attribute.GetType()] = state.attribute.Clone(); } } // now the interfaces System.Collections.IEnumerator attIt = new System.Collections.Hashtable(this.attributes).GetEnumerator(); while (attIt.MoveNext()) { System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) attIt.Current; clone.attributes[entry.Key] = clone.attributeImpls[entry.Value.GetType()]; } return clone; }
/// <summary>Remove all our references to readers, and commits /// any pending changes. /// </summary> internal virtual void Close() { lock (this) { System.Collections.IEnumerator iter = new System.Collections.Hashtable(readerMap).GetEnumerator(); while (iter.MoveNext()) { System.Collections.DictionaryEntry ent = (System.Collections.DictionaryEntry) iter.Current; SegmentReader sr = (SegmentReader) ent.Value; if (sr.hasChanges) { System.Diagnostics.Debug.Assert(InfoIsLive(sr.GetSegmentInfo())); sr.StartCommit(); bool success = false; try { sr.DoCommit(null); success = true; } finally { if (!success) { sr.RollbackCommit(); } } } readerMap.Remove(iter.Current); // NOTE: it is allowed that this decRef does not // actually close the SR; this can happen when a // near real-time reader is kept open after the // IndexWriter instance is closed sr.DecRef(); } } }