public override object ObjectVal(int doc) { return(valid.Get(doc) ? arr.Get(doc) : (int?)null); }
public override int NextDoc() { while (true) { //System.out.println(" nextDoc cycle docUpto=" + docUpto + " numDocs=" + numDocs + " fp=" + in.getPosition() + " this=" + this); if (docUpto == numDocs) { // System.out.println(" END"); return(docID = NO_MORE_DOCS); } docUpto++; if (indexOptions == IndexOptions.DOCS_ONLY) { accum += @in.ReadVInt32(); } else { int code = @in.ReadVInt32(); accum += (int)((uint)code >> 1); //System.out.println(" docID=" + accum + " code=" + code); if ((code & 1) != 0) { freq = 1; } else { freq = @in.ReadVInt32(); Debug.Assert(freq > 0); } if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { // Skip positions/payloads for (int posUpto = 0; posUpto < freq; posUpto++) { if (!storePayloads) { @in.ReadVInt32(); } else { int posCode = @in.ReadVInt32(); if ((posCode & 1) != 0) { payloadLen = @in.ReadVInt32(); } @in.SkipBytes(payloadLen); } } } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { // Skip positions/offsets/payloads for (int posUpto = 0; posUpto < freq; posUpto++) { int posCode = @in.ReadVInt32(); if (storePayloads && ((posCode & 1) != 0)) { payloadLen = @in.ReadVInt32(); } if ((@in.ReadVInt32() & 1) != 0) { // new offset length @in.ReadVInt32(); } if (storePayloads) { @in.SkipBytes(payloadLen); } } } } if (liveDocs == null || liveDocs.Get(accum)) { //System.out.println(" return docID=" + accum + " freq=" + freq); return(docID = accum); } } }
public override int NextDoc() { while (true) { if (_postings.Eof) { return(_docId = NO_MORE_DOCS); } var code = _postings.ReadVInt32(); if (_indexOptions == IndexOptions.DOCS_ONLY) { _accum += code; } else { _accum += (int)((uint)code >> 1);; // shift off low bit _freq = (code & 1) != 0 ? 1 : _postings.ReadVInt32(); if (_indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { // Skip positions if (_storePayloads) { for (var pos = 0; pos < _freq; pos++) { var posCode = _postings.ReadVInt32(); if ((posCode & 1) != 0) { _payloadLength = _postings.ReadVInt32(); } if (_storeOffsets && (_postings.ReadVInt32() & 1) != 0) { // new offset length _postings.ReadVInt32(); } if (_payloadLength != 0) { _postings.SkipBytes(_payloadLength); } } } else { for (var pos = 0; pos < _freq; pos++) { // TODO: skipVInt _postings.ReadVInt32(); if (_storeOffsets && (_postings.ReadVInt32() & 1) != 0) { // new offset length _postings.ReadVInt32(); } } } } } if (_liveDocs == null || _liveDocs.Get(_accum)) { return(_docId = _accum); } } }
public bool Get(int index) { return(liveDocs.Get(outerInstance.docMap.OldToNew(index))); }
public bool Get(int index) { Debug.Assert(index >= 0 && index < Length); return(@in.Get(index)); }
public override int NextDoc() { //System.out.println("Q.nextDoc() parentDoc=" + parentDoc + " childDoc=" + childDoc); // Loop until we hit a childDoc that's accepted while (true) { if (_childDoc + 1 == _parentDoc) { // OK, we are done iterating through all children // matching this one parent doc, so we now nextDoc() // the parent. Use a while loop because we may have // to skip over some number of parents w/ no // children: while (true) { _parentDoc = _parentScorer.NextDoc(); ValidateParentDoc(); if (_parentDoc == 0) { // Degenerate but allowed: first parent doc has no children // TODO: would be nice to pull initial parent // into ctor so we can skip this if... but it's // tricky because scorer must return -1 for // .doc() on init... _parentDoc = _parentScorer.NextDoc(); ValidateParentDoc(); } if (_parentDoc == NO_MORE_DOCS) { _childDoc = NO_MORE_DOCS; //System.out.println(" END"); return(_childDoc); } // Go to first child for this next parentDoc: _childDoc = 1 + _parentBits.PrevSetBit(_parentDoc - 1); if (_childDoc == _parentDoc) { // This parent has no children; continue // parent loop so we move to next parent continue; } if (_acceptDocs != null && !_acceptDocs.Get(_childDoc)) { goto nextChildDocContinue; } if (_childDoc < _parentDoc) { if (_doScores) { _parentScore = _parentScorer.GetScore(); _parentFreq = _parentScorer.Freq; } //System.out.println(" " + childDoc); return(_childDoc); } else { // Degenerate but allowed: parent has no children } } } if (Debugging.AssertsEnabled) { Debugging.Assert(_childDoc < _parentDoc, "childDoc={0} parentDoc={1}", _childDoc, _parentDoc); } _childDoc++; if (_acceptDocs != null && !_acceptDocs.Get(_childDoc)) { continue; } //System.out.println(" " + childDoc); return(_childDoc); nextChildDocContinue :; } }
public override object ObjectVal(int doc) { return(valid.Get(doc) ? J2N.Numerics.Int32.GetInstance(arr.Get(doc)) : null); // LUCENENET: In Java, the conversion to instance of java.util.Integer is implicit, but we need to do an explicit conversion }
public virtual void TestDocsEnum() { IBits mappedLiveDocs = RandomLiveDocs(reader.MaxDoc); TermsEnum termsEnum = reader.GetTerms(DOCS_ENUM_FIELD).GetIterator(null); assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(DOCS_ENUM_TERM))); DocsEnum docs = termsEnum.Docs(mappedLiveDocs, null); int doc; int prev = -1; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { assertTrue("document " + doc + " marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(doc)); assertEquals("incorrect value; doc " + doc, sortedValues[doc], int.Parse(reader.Document(doc).Get(ID_FIELD))); while (++prev < doc) { assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev)); } } while (++prev < reader.MaxDoc) { assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev)); } DocsEnum reuse = docs; docs = termsEnum.Docs(mappedLiveDocs, reuse); if (docs is SortingAtomicReader.SortingDocsEnum) { assertTrue(((SortingAtomicReader.SortingDocsEnum)docs).Reused(reuse)); // make sure reuse worked } doc = -1; prev = -1; while ((doc = docs.Advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS) { assertTrue("document " + doc + " marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(doc)); assertEquals("incorrect value; doc " + doc, sortedValues[doc], int.Parse(reader.Document(doc).Get(ID_FIELD))); while (++prev < doc) { assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev)); } } while (++prev < reader.MaxDoc) { assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev)); } }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) { int maxDoc = context.Reader.MaxDoc; FieldCache.Int32s idSource = FieldCache.DEFAULT.GetInt32s(context.AtomicReader, "id", false); Assert.IsNotNull(idSource); FixedBitSet bits = new FixedBitSet(maxDoc); for (int docID = 0; docID < maxDoc; docID++) { if ((float)random.NextDouble() <= density && (acceptDocs == null || acceptDocs.Get(docID))) { bits.Set(docID); //System.out.println(" acc id=" + idSource.Get(docID) + " docID=" + docID + " id=" + idSource.Get(docID) + " v=" + docValues.Get(idSource.Get(docID)).Utf8ToString()); matchValues.Add(docValues[idSource.Get(docID)]); } } return(bits); }
public override int NextDoc() { if (_docId == NO_MORE_DOCS) { return(_docId); } bool first = true; int termFreq = 0; while (true) { long lineStart = _in.GetFilePointer(); SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); _docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); termFreq = 0; first = false; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); termFreq = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS)) { // skip termFreq++; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { Debug.Assert( StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) || // LUCENENET TODO: This assert fails sometimes, which in turns causes _scratch.Utf8ToString() to throw an index out of range exception StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END) /*, "scratch=" + _scratch.Utf8ToString()*/); if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } return(_docId = NO_MORE_DOCS); } } }
private IEnumerable <long?> GetMergeSortedSetOrdsEnumerable(AtomicReader[] readers, SortedSetDocValues[] dvs, OrdinalMap map) { int readerUpto = -1; int docIDUpto = 0; AtomicReader currentReader = null; IBits currentLiveDocs = null; var ords = new long[8]; int ordUpto = 0; int ordLength = 0; while (true) { if (readerUpto == readers.Length) { yield break; } if (ordUpto < ordLength) { var value = ords[ordUpto]; ordUpto++; yield return(value); continue; } if (currentReader == null || docIDUpto == currentReader.MaxDoc) { readerUpto++; if (readerUpto < readers.Length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.LiveDocs; } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto)) { if (Debugging.AssertsEnabled) { Debugging.Assert(docIDUpto < currentReader.MaxDoc); } SortedSetDocValues dv = dvs[readerUpto]; dv.SetDocument(docIDUpto); ordUpto = ordLength = 0; long ord; while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ordLength == ords.Length) { ords = ArrayUtil.Grow(ords, ordLength + 1); } ords[ordLength] = map.GetGlobalOrd(readerUpto, ord); ordLength++; } docIDUpto++; continue; } docIDUpto++; } }
/// <summary> /// Assert that the content of the <see cref="DocIdSet"/> is the same as the content of the <see cref="OpenBitSet"/>. /// </summary> #pragma warning disable xUnit1013 public virtual void AssertEquals(int numBits, OpenBitSet ds1, WAH8DocIdSet ds2) #pragma warning restore xUnit1013 { // nextDoc DocIdSetIterator it2 = ds2.GetIterator(); if (it2 == null) { Assert.AreEqual(-1, ds1.NextSetBit(0)); } else { Assert.AreEqual(-1, it2.DocID); for (int doc = ds1.NextSetBit(0); doc != -1; doc = ds1.NextSetBit(doc + 1)) { Assert.AreEqual(doc, it2.NextDoc()); Assert.AreEqual(doc, it2.DocID); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.DocID); } // nextDoc / advance it2 = ds2.GetIterator(); if (it2 == null) { Assert.AreEqual(-1, ds1.NextSetBit(0)); } else { for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;) { if (Random.NextBoolean()) { doc = ds1.NextSetBit(doc + 1); if (doc == -1) { doc = DocIdSetIterator.NO_MORE_DOCS; } Assert.AreEqual(doc, it2.NextDoc()); Assert.AreEqual(doc, it2.DocID); } else { int target = doc + 1 + Random.Next(Random.NextBoolean() ? 64 : Math.Max(numBits / 8, 1)); doc = ds1.NextSetBit(target); if (doc == -1) { doc = DocIdSetIterator.NO_MORE_DOCS; } Assert.AreEqual(doc, it2.Advance(target)); Assert.AreEqual(doc, it2.DocID); } } } // bits() IBits bits = ds2.Bits; if (bits != null) { // test consistency between bits and iterator it2 = ds2.GetIterator(); for (int previousDoc = -1, doc = it2.NextDoc(); ; previousDoc = doc, doc = it2.NextDoc()) { int max = doc == DocIdSetIterator.NO_MORE_DOCS ? bits.Length : doc; for (int i = previousDoc + 1; i < max; ++i) { Assert.AreEqual(false, bits.Get(i)); } if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } Assert.AreEqual(true, bits.Get(doc)); } } Assert.AreEqual(ds1.Cardinality(), ds2.Cardinality()); }
private int CopyVectorsWithDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2) { int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; int totalNumDocs = 0; if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int docNum = 0; docNum < maxDoc;) { if (!liveDocs.Get(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) { break; } if (!liveDocs.Get(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); totalNumDocs += numDocs; mergeState.CheckAbort.Work(300 * numDocs); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { if (!liveDocs.Get(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.GetTermVectors(docNum); AddAllDocVectors(vectors, mergeState); totalNumDocs++; mergeState.CheckAbort.Work(300); } } return(totalNumDocs); }
public override FacetDataCache Load(BoboSegmentReader reader) { TreeDictionary <object, List <int> > dataMap = null; List <int> docList = null; int nullMinId = -1; int nullMaxId = -1; int nullFreq = 0; int doc = -1; IBits liveDocs = reader.LiveDocs; for (int i = 0; i < reader.MaxDoc; ++i) { if (liveDocs != null && !liveDocs.Get(i)) { continue; } doc = i; object val = m_facetDataFetcher.Fetch(reader, doc); if (val == null) { if (nullMinId < 0) { nullMinId = doc; } nullMaxId = doc; ++nullFreq; continue; } if (dataMap == null) { // Initialize. if (val is long[]) { if (m_termListFactory == null) { m_termListFactory = new TermFixedLengthInt64ArrayListFactory( ((long[])val).Length); } dataMap = new TreeDictionary <object, List <int> >(new VirtualSimpleFacetHandlerInt16ArrayComparer()); } else if (val is IComparable) { dataMap = new TreeDictionary <object, List <int> >(); } else { dataMap = new TreeDictionary <object, List <int> >(new VirtualSimpleFacetHandlerObjectComparer()); } } if (dataMap.Contains(val)) { docList = dataMap[val]; } else { docList = null; } if (docList == null) { docList = new List <int>(); dataMap[val] = docList; } docList.Add(doc); } m_facetDataFetcher.Cleanup(reader); int maxDoc = reader.MaxDoc; int size = dataMap == null ? 1 : (dataMap.Count + 1); BigSegmentedArray order = new BigInt32Array(maxDoc); ITermValueList list = m_termListFactory == null ? new TermStringList(size) : m_termListFactory.CreateTermList(size); int[] freqs = new int[size]; int[] minIDs = new int[size]; int[] maxIDs = new int[size]; list.Add(null); freqs[0] = nullFreq; minIDs[0] = nullMinId; maxIDs[0] = nullMaxId; if (dataMap != null) { int i = 1; int?docId; foreach (var entry in dataMap) { list.Add(list.Format(entry.Key)); docList = entry.Value; freqs[i] = docList.Count; minIDs[i] = docList.Get(0, int.MinValue); while ((docId = docList.Poll(int.MinValue)) != int.MinValue) { doc = (int)docId; order.Add(doc, i); } maxIDs[i] = doc; ++i; } } list.Seal(); FacetDataCache dataCache = new FacetDataCache(order, list, freqs, minIDs, maxIDs, TermCountSize.Large); return(dataCache); }
public override int NextDoc() { bool first = true; _in.Seek(_nextDocStart); long posStart = 0; while (true) { long lineStart = _in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream SimpleTextUtil.ReadLine(_in, _scratch); //System.out.println("NEXT DOC: " + scratch.utf8ToString()); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _nextDocStart = lineStart; _in.Seek(posStart); return(_docId); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); _docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); _tf = 0; first = false; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); _tf = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); posStart = _in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { if (Debugging.AssertsEnabled) { Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END)); } if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _nextDocStart = lineStart; _in.Seek(posStart); return(_docId); } return(_docId = NO_MORE_DOCS); } } }
public virtual void Test() { #pragma warning disable 612, 618 IFieldCache cache = FieldCache.DEFAULT; FieldCache.Doubles doubles = cache.GetDoubles(Reader, "theDouble", Random().NextBoolean()); Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", FieldCache.DEFAULT_DOUBLE_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(doubles.Get(i) == (double.MaxValue - i), doubles.Get(i) + " does not equal: " + (double.MaxValue - i)); } FieldCache.Int64s longs = cache.GetInt64s(Reader, "theLong", Random().NextBoolean()); Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", FieldCache.DEFAULT_INT64_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(longs.Get(i) == (long.MaxValue - i), longs.Get(i) + " does not equal: " + (long.MaxValue - i) + " i=" + i); } FieldCache.Bytes bytes = cache.GetBytes(Reader, "theByte", Random().NextBoolean()); Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", FieldCache.DEFAULT_BYTE_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue((sbyte)bytes.Get(i) == (sbyte)(sbyte.MaxValue - i), (sbyte)bytes.Get(i) + " does not equal: " + (sbyte.MaxValue - i)); } FieldCache.Int16s shorts = cache.GetInt16s(Reader, "theShort", Random().NextBoolean()); Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", FieldCache.DEFAULT_INT16_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(shorts.Get(i) == (short)(short.MaxValue - i), shorts.Get(i) + " does not equal: " + (short.MaxValue - i)); } FieldCache.Int32s ints = cache.GetInt32s(Reader, "theInt", Random().NextBoolean()); Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", FieldCache.DEFAULT_INT32_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(ints.Get(i) == (int.MaxValue - i), ints.Get(i) + " does not equal: " + (int.MaxValue - i)); } FieldCache.Singles floats = cache.GetSingles(Reader, "theFloat", Random().NextBoolean()); Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", Random().NextBoolean()), "Second request to cache return same array"); Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", FieldCache.DEFAULT_SINGLE_PARSER, Random().NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(floats.Get(i) == (float.MaxValue - i), floats.Get(i) + " does not equal: " + (float.MaxValue - i)); } #pragma warning restore 612, 618 IBits docsWithField = cache.GetDocsWithField(Reader, "theLong"); Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "theLong"), "Second request to cache return same array"); Assert.IsTrue(docsWithField is Bits.MatchAllBits, "docsWithField(theLong) must be class Bits.MatchAllBits"); Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(theLong) Size: " + docsWithField.Length + " is not: " + NUM_DOCS); for (int i = 0; i < docsWithField.Length; i++) { Assert.IsTrue(docsWithField.Get(i)); } docsWithField = cache.GetDocsWithField(Reader, "sparse"); Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "sparse"), "Second request to cache return same array"); Assert.IsFalse(docsWithField is Bits.MatchAllBits, "docsWithField(sparse) must not be class Bits.MatchAllBits"); Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(sparse) Size: " + docsWithField.Length + " is not: " + NUM_DOCS); for (int i = 0; i < docsWithField.Length; i++) { Assert.AreEqual(i % 2 == 0, docsWithField.Get(i)); } // getTermsIndex SortedDocValues termsIndex = cache.GetTermsIndex(Reader, "theRandomUnicodeString"); Assert.AreSame(termsIndex, cache.GetTermsIndex(Reader, "theRandomUnicodeString"), "Second request to cache return same array"); BytesRef br = new BytesRef(); for (int i = 0; i < NUM_DOCS; i++) { BytesRef term; int ord = termsIndex.GetOrd(i); if (ord == -1) { term = null; } else { termsIndex.LookupOrd(ord, br); term = br; } string s = term == null ? null : term.Utf8ToString(); Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]); } int nTerms = termsIndex.ValueCount; TermsEnum tenum = termsIndex.GetTermsEnum(); BytesRef val = new BytesRef(); for (int i = 0; i < nTerms; i++) { BytesRef val1 = tenum.Next(); termsIndex.LookupOrd(i, val); // System.out.println("i="+i); Assert.AreEqual(val, val1); } // seek the enum around (note this isn't a great test here) int num = AtLeast(100); for (int i = 0; i < num; i++) { int k = Random().Next(nTerms); termsIndex.LookupOrd(k, val); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val)); Assert.AreEqual(val, tenum.Term); } for (int i = 0; i < nTerms; i++) { termsIndex.LookupOrd(i, val); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val)); Assert.AreEqual(val, tenum.Term); } // test bad field termsIndex = cache.GetTermsIndex(Reader, "bogusfield"); // getTerms BinaryDocValues terms = cache.GetTerms(Reader, "theRandomUnicodeString", true); Assert.AreSame(terms, cache.GetTerms(Reader, "theRandomUnicodeString", true), "Second request to cache return same array"); IBits bits = cache.GetDocsWithField(Reader, "theRandomUnicodeString"); for (int i = 0; i < NUM_DOCS; i++) { terms.Get(i, br); BytesRef term; if (!bits.Get(i)) { term = null; } else { term = br; } string s = term == null ? null : term.Utf8ToString(); Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]); } // test bad field terms = cache.GetTerms(Reader, "bogusfield", false); // getDocTermOrds SortedSetDocValues termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField"); int numEntries = cache.GetCacheEntries().Length; // ask for it again, and check that we didnt create any additional entries: termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField"); Assert.AreEqual(numEntries, cache.GetCacheEntries().Length); for (int i = 0; i < NUM_DOCS; i++) { termOrds.SetDocument(i); // this will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId IList <BytesRef> values = new List <BytesRef>(new /*Linked*/ HashSet <BytesRef>(Arrays.AsList(MultiValued[i]))); foreach (BytesRef v in values) { if (v == null) { // why does this test use null values... instead of an empty list: confusing break; } long ord = termOrds.NextOrd(); Debug.Assert(ord != SortedSetDocValues.NO_MORE_ORDS); BytesRef scratch = new BytesRef(); termOrds.LookupOrd(ord, scratch); Assert.AreEqual(v, scratch); } Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, termOrds.NextOrd()); } // test bad field termOrds = cache.GetDocTermOrds(Reader, "bogusfield"); Assert.IsTrue(termOrds.ValueCount == 0); FieldCache.DEFAULT.PurgeByCacheKey(Reader.CoreCacheKey); }
public virtual void TestRandom() { int num = AtLeast(2); for (int iter = 0; iter < num; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); // we can do this because we use NoMergePolicy (and dont merge to "nothing") w.KeepFullyDeletedSegments = true; IDictionary <BytesRef, IList <int?> > docs = new Dictionary <BytesRef, IList <int?> >(); ISet <int?> deleted = new JCG.HashSet <int?>(); IList <BytesRef> terms = new List <BytesRef>(); int numDocs = TestUtil.NextInt32(Random, 1, 100 * RANDOM_MULTIPLIER); Documents.Document doc = new Documents.Document(); Field f = NewStringField("field", "", Field.Store.NO); doc.Add(f); Field id = NewStringField("id", "", Field.Store.NO); doc.Add(id); bool onlyUniqueTerms = Random.NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs); } ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>(); for (int i = 0; i < numDocs; i++) { if (!onlyUniqueTerms && Random.NextBoolean() && terms.Count > 0) { // re-use existing term BytesRef term = terms[Random.Next(terms.Count)]; docs[term].Add(i); f.SetStringValue(term.Utf8ToString()); } else { string s = TestUtil.RandomUnicodeString(Random, 10); BytesRef term = new BytesRef(s); if (!docs.TryGetValue(term, out IList <int?> docsTerm)) { docs[term] = docsTerm = new List <int?>(); } docsTerm.Add(i); terms.Add(term); uniqueTerms.Add(term); f.SetStringValue(s); } id.SetStringValue("" + i); w.AddDocument(doc); if (Random.Next(4) == 1) { w.Commit(); } if (i > 0 && Random.Next(20) == 1) { int delID = Random.Next(i); deleted.Add(delID); w.DeleteDocuments(new Term("id", "" + delID)); if (VERBOSE) { Console.WriteLine("TEST: delete " + delID); } } } if (VERBOSE) { List <BytesRef> termsList = new List <BytesRef>(uniqueTerms); #pragma warning disable 612, 618 termsList.Sort(BytesRef.UTF8SortedAsUTF16Comparer); #pragma warning restore 612, 618 Console.WriteLine("TEST: terms in UTF16 order:"); foreach (BytesRef b in termsList) { Console.WriteLine(" " + UnicodeUtil.ToHexString(b.Utf8ToString()) + " " + b); foreach (int docID in docs[b]) { if (deleted.Contains(docID)) { Console.WriteLine(" " + docID + " (deleted)"); } else { Console.WriteLine(" " + docID); } } } } IndexReader reader = w.GetReader(); w.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: reader=" + reader); } IBits liveDocs = MultiFields.GetLiveDocs(reader); foreach (int delDoc in deleted) { Assert.IsFalse(liveDocs.Get(delDoc)); } for (int i = 0; i < 100; i++) { BytesRef term = terms[Random.Next(terms.Count)]; if (VERBOSE) { Console.WriteLine("TEST: seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " " + term); } DocsEnum docsEnum = TestUtil.Docs(Random, reader, "field", term, liveDocs, null, DocsFlags.NONE); Assert.IsNotNull(docsEnum); foreach (int docID in docs[term]) { if (!deleted.Contains(docID)) { Assert.AreEqual(docID, docsEnum.NextDoc()); } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); } reader.Dispose(); dir.Dispose(); } }
public virtual void TestDocValuesIntegration() { AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues()); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value"))); doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value"))); doc.Add(new NumericDocValuesField("numeric", 42)); if (DefaultCodecSupportsSortedSet()) { doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1"))); doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2"))); } iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); BytesRef scratch = new BytesRef(); // Binary type: can be retrieved via getTerms() try { FieldCache.DEFAULT.GetInt32s(ar, "binary", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } BinaryDocValues binary = FieldCache.DEFAULT.GetTerms(ar, "binary", true); binary.Get(0, scratch); Assert.AreEqual("binary value", scratch.Utf8ToString()); try { FieldCache.DEFAULT.GetTermsIndex(ar, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetDocTermOrds(ar, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } IBits bits = FieldCache.DEFAULT.GetDocsWithField(ar, "binary"); Assert.IsTrue(bits.Get(0)); // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds() try { FieldCache.DEFAULT.GetInt32s(ar, "sorted", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "sorted"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } binary = FieldCache.DEFAULT.GetTerms(ar, "sorted", true); binary.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedDocValues sorted = FieldCache.DEFAULT.GetTermsIndex(ar, "sorted"); Assert.AreEqual(0, sorted.GetOrd(0)); Assert.AreEqual(1, sorted.ValueCount); sorted.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sorted"); sortedSet.SetDocument(0); Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.ValueCount); bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sorted"); Assert.IsTrue(bits.Get(0)); // Numeric type: can be retrieved via getInts() and so on Int32s numeric = FieldCache.DEFAULT.GetInt32s(ar, "numeric", false); Assert.AreEqual(42, numeric.Get(0)); try { FieldCache.DEFAULT.GetTerms(ar, "numeric", true); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTermsIndex(ar, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetDocTermOrds(ar, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } bits = FieldCache.DEFAULT.GetDocsWithField(ar, "numeric"); Assert.IsTrue(bits.Get(0)); // SortedSet type: can be retrieved via getDocTermOrds() if (DefaultCodecSupportsSortedSet()) { try { FieldCache.DEFAULT.GetInt32s(ar, "sortedset", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTerms(ar, "sortedset", true); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTermsIndex(ar, "sortedset"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "sortedset"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sortedset"); sortedSet.SetDocument(0); Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(2, sortedSet.ValueCount); bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sortedset"); Assert.IsTrue(bits.Get(0)); } ir.Dispose(); dir.Dispose(); }
public override int NextDoc() { //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc); // Loop until we hit a parentDoc that's accepted while (true) { if (_nextChildDoc == NO_MORE_DOCS) { //System.out.println(" end"); return(_parentDoc = NO_MORE_DOCS); } // Gather all children sharing the same parent as // nextChildDoc _parentDoc = _parentBits.NextSetBit(_nextChildDoc); // Parent & child docs are supposed to be // orthogonal: if (_nextChildDoc == _parentDoc) { throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType()); } //System.out.println(" parentDoc=" + parentDoc); if (Debugging.AssertsEnabled) { Debugging.Assert(_parentDoc != -1); } //System.out.println(" nextChildDoc=" + nextChildDoc); if (_acceptDocs != null && !_acceptDocs.Get(_parentDoc)) { // Parent doc not accepted; skip child docs until // we hit a new parent doc: do { _nextChildDoc = _childScorer.NextDoc(); } while (_nextChildDoc < _parentDoc); // Parent & child docs are supposed to be // orthogonal: if (_nextChildDoc == _parentDoc) { throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType()); } continue; } float totalScore = 0; float maxScore = float.NegativeInfinity; _childDocUpto = 0; _parentFreq = 0; do { //System.out.println(" c=" + nextChildDoc); if (_pendingChildDocs != null && _pendingChildDocs.Length == _childDocUpto) { _pendingChildDocs = ArrayUtil.Grow(_pendingChildDocs); } if (_pendingChildScores != null && _scoreMode != ScoreMode.None && _pendingChildScores.Length == _childDocUpto) { _pendingChildScores = ArrayUtil.Grow(_pendingChildScores); } if (_pendingChildDocs != null) { _pendingChildDocs[_childDocUpto] = _nextChildDoc; } if (_scoreMode != ScoreMode.None) { // TODO: specialize this into dedicated classes per-scoreMode float childScore = _childScorer.GetScore(); int childFreq = _childScorer.Freq; if (_pendingChildScores != null) { _pendingChildScores[_childDocUpto] = childScore; } maxScore = Math.Max(childScore, maxScore); totalScore += childScore; _parentFreq += childFreq; } _childDocUpto++; _nextChildDoc = _childScorer.NextDoc(); } while (_nextChildDoc < _parentDoc); // Parent & child docs are supposed to be // orthogonal: if (_nextChildDoc == _parentDoc) { throw new InvalidOperationException("child query must only match non-parent docs, but parent docID=" + _nextChildDoc + " matched childScorer=" + _childScorer.GetType()); } switch (_scoreMode) { case ScoreMode.Avg: _parentScore = totalScore / _childDocUpto; break; case ScoreMode.Max: _parentScore = maxScore; break; case ScoreMode.Total: _parentScore = totalScore; break; case ScoreMode.None: break; } //System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto); return(_parentDoc); } }
public virtual void TestNonIndexedFields() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new StoredField("bogusbytes", "bogus")); doc.Add(new StoredField("bogusshorts", "bogus")); doc.Add(new StoredField("bogusints", "bogus")); doc.Add(new StoredField("boguslongs", "bogus")); doc.Add(new StoredField("bogusfloats", "bogus")); doc.Add(new StoredField("bogusdoubles", "bogus")); doc.Add(new StoredField("bogusterms", "bogus")); doc.Add(new StoredField("bogustermsindex", "bogus")); doc.Add(new StoredField("bogusmultivalued", "bogus")); doc.Add(new StoredField("bogusbits", "bogus")); iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); IFieldCache cache = FieldCache.DEFAULT; cache.PurgeAllCaches(); Assert.AreEqual(0, cache.GetCacheEntries().Length); #pragma warning disable 612, 618 Bytes bytes = cache.GetBytes(ar, "bogusbytes", true); Assert.AreEqual(0, bytes.Get(0)); Int16s shorts = cache.GetInt16s(ar, "bogusshorts", true); Assert.AreEqual(0, shorts.Get(0)); #pragma warning restore 612, 618 Int32s ints = cache.GetInt32s(ar, "bogusints", true); Assert.AreEqual(0, ints.Get(0)); Int64s longs = cache.GetInt64s(ar, "boguslongs", true); Assert.AreEqual(0, longs.Get(0)); Singles floats = cache.GetSingles(ar, "bogusfloats", true); Assert.AreEqual(0, floats.Get(0), 0.0f); Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true); Assert.AreEqual(0, doubles.Get(0), 0.0D); BytesRef scratch = new BytesRef(); BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true); binaries.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex"); Assert.AreEqual(-1, sorted.GetOrd(0)); sorted.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued"); sortedSet.SetDocument(0); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); IBits bits = cache.GetDocsWithField(ar, "bogusbits"); Assert.IsFalse(bits.Get(0)); // check that we cached nothing Assert.AreEqual(0, cache.GetCacheEntries().Length); ir.Dispose(); dir.Dispose(); }
public virtual void TestDocsWithField() { AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); if (Random.Next(4) >= 0) { doc.Add(new NumericDocValuesField("numbers", Random.NextInt64())); } doc.Add(new NumericDocValuesField("numbersAlways", Random.NextInt64())); iw.AddDocument(doc); if (Random.Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.GetReader(); iw.ForceMerge(1); DirectoryReader ir2 = iw.GetReader(); AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); IBits multi = MultiDocValues.GetDocsWithField(ir, "numbers"); IBits single = merged.GetDocsWithField("numbers"); if (multi == null) { Assert.IsNull(single); } else { Assert.AreEqual(single.Length, multi.Length); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } } multi = MultiDocValues.GetDocsWithField(ir, "numbersAlways"); single = merged.GetDocsWithField("numbersAlways"); Assert.AreEqual(single.Length, multi.Length); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
public virtual void SearchIndex(Directory dir, string oldName) { //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random)); //Query query = parser.parse("handle:1"); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); TestUtil.CheckIndex(dir); // true if this is a 4.0+ index bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null; // true if this is a 4.2+ index bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null; Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk! IBits liveDocs = MultiFields.GetLiveDocs(reader); for (int i = 0; i < 35; i++) { if (liveDocs.Get(i)) { Document d = reader.Document(i); IList <IIndexableField> fields = d.Fields; bool isProxDoc = d.GetField("content3") == null; if (isProxDoc) { int numFields = is40Index ? 7 : 5; Assert.AreEqual(numFields, fields.Count); IIndexableField f = d.GetField("id"); Assert.AreEqual("" + i, f.GetStringValue()); f = d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue()); f = d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.GetStringValue()); f = d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.GetStringValue()); f = d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.GetStringValue()); } Fields tfvFields = reader.GetTermVectors(i); Assert.IsNotNull(tfvFields, "i=" + i); Terms tfv = tfvFields.GetTerms("utf8"); Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName); } else { // Only ID 7 is deleted Assert.AreEqual(7, i); } } if (is40Index) { // check docvalues fields NumericDocValues dvByte = MultiDocValues.GetNumericValues(reader, "dvByte"); BinaryDocValues dvBytesDerefFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed"); BinaryDocValues dvBytesDerefVar = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar"); SortedDocValues dvBytesSortedFixed = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed"); SortedDocValues dvBytesSortedVar = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar"); BinaryDocValues dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed"); BinaryDocValues dvBytesStraightVar = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar"); NumericDocValues dvDouble = MultiDocValues.GetNumericValues(reader, "dvDouble"); NumericDocValues dvFloat = MultiDocValues.GetNumericValues(reader, "dvFloat"); NumericDocValues dvInt = MultiDocValues.GetNumericValues(reader, "dvInt"); NumericDocValues dvLong = MultiDocValues.GetNumericValues(reader, "dvLong"); NumericDocValues dvPacked = MultiDocValues.GetNumericValues(reader, "dvPacked"); NumericDocValues dvShort = MultiDocValues.GetNumericValues(reader, "dvShort"); SortedSetDocValues dvSortedSet = null; if (is42Index) { dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet"); } for (int i = 0; i < 35; i++) { int id = Convert.ToInt32(reader.Document(i).Get("id")); Assert.AreEqual(id, dvByte.Get(i)); sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id }; BytesRef expectedRef = new BytesRef((byte[])(Array)bytes); BytesRef scratch = new BytesRef(); dvBytesDerefFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesDerefVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D); Assert.AreEqual((float)id, Number.Int32BitsToSingle((int)dvFloat.Get(i)), 0F); Assert.AreEqual(id, dvInt.Get(i)); Assert.AreEqual(id, dvLong.Get(i)); Assert.AreEqual(id, dvPacked.Get(i)); Assert.AreEqual(id, dvShort.Get(i)); if (is42Index) { dvSortedSet.SetDocument(i); long ord = dvSortedSet.NextOrd(); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd()); dvSortedSet.LookupOrd(ord, scratch); Assert.AreEqual(expectedRef, scratch); } } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #0 Document doc = searcher.IndexReader.Document(hits[0].Doc); assertEquals("didn't get the right document first", "0", doc.Get("id")); DoTestHits(hits, 34, searcher.IndexReader); if (is40Index) { hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); } hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); reader.Dispose(); }
public bool Get(int index) { return(@in.Get(docMap.NewToOld(index))); }
protected override bool Match(int docid) { return(acceptDocs.Get(docid)); }
private int CopyFieldsWithDeletions(MergeState mergeState, AtomicReader reader, Lucene40StoredFieldsReader matchingFieldsReader, int[] rawDocLengths) { int docCount = 0; int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; Debug.Assert(liveDocs != null); if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc;) { if (!liveDocs.Get(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (!liveDocs.Get(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; mergeState.CheckAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (!liveDocs.Get(j)) { // skip deleted docs continue; } // TODO: this could be more efficient using // FieldVisitor instead of loading/writing entire // doc; ie we just have to renumber the field number // on the fly? // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.Document(j); AddDocument(doc, mergeState.FieldInfos); docCount++; mergeState.CheckAbort.Work(300); } } return(docCount); }
public override bool Exists(int doc) { return(arr.Get(doc) != 0 || valid.Get(doc)); }
public override int NextDoc() { while (posPending > 0) { NextPosition(); } while (true) { //System.out.println(" nextDoc cycle docUpto=" + docUpto + " numDocs=" + numDocs + " fp=" + in.getPosition() + " this=" + this); if (docUpto == numDocs) { //System.out.println(" END"); return(docID = NO_MORE_DOCS); } docUpto++; int code = @in.ReadVInt32(); accum += (int)((uint)code >> 1); if ((code & 1) != 0) { freq = 1; } else { freq = @in.ReadVInt32(); Debug.Assert(freq > 0); } if (liveDocs == null || liveDocs.Get(accum)) { pos = 0; startOffset = storeOffsets ? 0 : -1; posPending = freq; //System.out.println(" return docID=" + accum + " freq=" + freq); return(docID = accum); } // Skip positions for (int posUpto = 0; posUpto < freq; posUpto++) { if (!storePayloads) { @in.ReadVInt32(); } else { int skipCode = @in.ReadVInt32(); if ((skipCode & 1) != 0) { payloadLength = @in.ReadVInt32(); //System.out.println(" new payloadLen=" + payloadLength); } } if (storeOffsets) { if ((@in.ReadVInt32() & 1) != 0) { // new offset length offsetLength = @in.ReadVInt32(); } } if (storePayloads) { @in.SkipBytes(payloadLength); } } } }
public override int NextDoc() { if (_docId == NO_MORE_DOCS) { return(_docId); } bool first = true; int termFreq = 0; while (true) { long lineStart = _in.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream SimpleTextUtil.ReadLine(_in, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.DOC)) { if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.DOC.Length, _scratch.Length - SimpleTextFieldsWriter.DOC.Length, _scratchUtf16); _docId = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); termFreq = 0; first = false; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FREQ)) { UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextFieldsWriter.FREQ.Length, _scratch.Length - SimpleTextFieldsWriter.FREQ.Length, _scratchUtf16); termFreq = ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length); } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.POS)) { // skip termFreq++; } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.START_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END_OFFSET)) { // skip } else if (StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.PAYLOAD)) { // skip } else { if (Debugging.AssertsEnabled) { Debugging.Assert( StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.TERM) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.FIELD) || StringHelper.StartsWith(_scratch, SimpleTextFieldsWriter.END), "scratch={0}", _scratch.Utf8ToString()); } if (!first && (_liveDocs == null || _liveDocs.Get(_docId))) { _in.Seek(lineStart); if (!_omitTf) { _tf = termFreq; } return(_docId); } return(_docId = NO_MORE_DOCS); } } }
public virtual void TestStressMultiThreading() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); IndexWriter writer = new IndexWriter(dir, conf); // create index int numThreads = TestUtil.NextInt32(Random, 3, 6); int numDocs = AtLeast(2000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("id", "doc" + i, Store.NO)); double group = Random.NextDouble(); string g; if (group < 0.1) { g = "g0"; } else if (group < 0.5) { g = "g1"; } else if (group < 0.8) { g = "g2"; } else { g = "g3"; } doc.Add(new StringField("updKey", g, Store.NO)); for (int j = 0; j < numThreads; j++) { long value = Random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value))); doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2 } writer.AddDocument(doc); } CountdownEvent done = new CountdownEvent(numThreads); AtomicInt32 numUpdates = new AtomicInt32(AtLeast(100)); // same thread updates a field as well as reopens ThreadJob[] threads = new ThreadJob[numThreads]; for (int i = 0; i < threads.Length; i++) { string f = "f" + i; string cf = "cf" + i; threads[i] = new ThreadAnonymousInnerClassHelper(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf); } foreach (ThreadJob t in threads) { t.Start(); } done.Wait(); writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { AtomicReader r = context.AtomicReader; for (int i = 0; i < numThreads; i++) { BinaryDocValues bdv = r.GetBinaryDocValues("f" + i); NumericDocValues control = r.GetNumericDocValues("cf" + i); IBits docsWithBdv = r.GetDocsWithField("f" + i); IBits docsWithControl = r.GetDocsWithField("cf" + i); IBits liveDocs = r.LiveDocs; for (int j = 0; j < r.MaxDoc; j++) { if (liveDocs == null || liveDocs.Get(j)) { Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j)); if (docsWithBdv.Get(j)) { long ctrlValue = control.Get(j); long bdvValue = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2; // if (ctrlValue != bdvValue) { // System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch); // } Assert.AreEqual(ctrlValue, bdvValue); } } } } } reader.Dispose(); dir.Dispose(); }
public override object ObjectVal(int doc) { return(valid.Get(doc) ? (object)arr.Get(doc) : null); }