IBits C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

Fichier : IntersectsPrefixTreeFilter.cs Projet : Cefa68000/lucenenet

 public _VisitorTemplate_55(IntersectsPrefixTreeFilter _enclosing, AtomicReaderContext
                                                                       baseArg1, IBits baseArg2,
                            bool baseArg3)
     : base(_enclosing, baseArg1, baseArg2, baseArg3)
 {
     this._enclosing = _enclosing;
 }

Exemple #2

0

Afficher le fichier

Fichier : DistanceValueSource.cs Projet : synhershko/lucene.net

			public DistanceDocValues(DistanceValueSource enclosingInstance, IndexReader reader)
			{
				this.enclosingInstance = enclosingInstance;

				ptX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.GetFieldNameX()/*, true*/);
				ptY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.GetFieldNameY()/*, true*/);
				validX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.GetFieldNameX());
				validY = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.GetFieldNameY());
			}

Exemple #3

0

Afficher le fichier

Fichier : BBoxSimilarityValueSource.cs Projet : synhershko/lucene.net

			public BBoxSimilarityValueSourceDocValues(IndexReader reader, BBoxSimilarityValueSource enclosingInstance)
			{
				_enclosingInstance = enclosingInstance;

				minX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minX/*, true*/);
				minY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minY/*, true*/);
				maxX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxX/*, true*/);
				maxY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxY/*, true*/);

				validMinX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_minX);
				validMaxX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_maxX);
			}

Exemple #4

0

Afficher le fichier

Fichier : DistanceValueSource.cs Projet : Nangal/lucene.net

            public DistanceDocValues(DistanceValueSource enclosingInstance, IndexReader reader)
            {
                this.enclosingInstance = enclosingInstance;

                ptX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.GetFieldNameX()/*, true*/);
                ptY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.GetFieldNameY()/*, true*/);
                validX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.GetFieldNameX());
                validY = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.GetFieldNameY());

                from = enclosingInstance.from;
                calculator = enclosingInstance.strategy.GetSpatialContext().GetDistCalc();
                nullValue = (enclosingInstance.strategy.GetSpatialContext().IsGeo() ? 180 : double.MaxValue);
            }

Exemple #5

0

Afficher le fichier

Fichier : BBoxSimilarityValueSource.cs Projet : Nangal/lucene.net

            public BBoxSimilarityValueSourceDocValues(IndexReader reader, BBoxSimilarityValueSource enclosingInstance)
            {
                _enclosingInstance = enclosingInstance;
                rect = _enclosingInstance.strategy.GetSpatialContext().MakeRectangle(0, 0, 0, 0); //reused

                minX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minX/*, true*/);
                minY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minY/*, true*/);
                maxX = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxX/*, true*/);
                maxY = FieldCache_Fields.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxY/*, true*/);

                validMinX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_minX);
                validMaxX = FieldCache_Fields.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_maxX);
            }

Exemple #6

0

Afficher le fichier

Fichier : BBoxSimilarityValueSource.cs Projet : Cefa68000/lucenenet

            public BBoxSimilarityValueSourceFunctionValue(AtomicReader reader,
                                                          BBoxSimilarityValueSource enclosingInstance)
            {
                _enclosingInstance = enclosingInstance;
                rect = _enclosingInstance.strategy.SpatialContext.MakeRectangle(0, 0, 0, 0); //reused

                minX = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minX, true);
                minY = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_minY, true);
                maxX = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxX, true);
                maxY = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.field_maxY, true);

                validMinX = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_minX);
                validMaxX = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.field_maxX);
            }

Exemple #7

0

Afficher le fichier

Fichier : WithinPrefixTreeFilter.cs Projet : Cefa68000/lucenenet

 /// <exception cref="System.IO.IOException"></exception>
 public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs
     )
 {
     return new _VisitorTemplate_121(this, context, acceptDocs, true).GetDocIdSet();
 }

Exemple #8

0

Afficher le fichier

Fichier : ContainsPrefixTreeFilter.cs Projet : Cefa68000/lucenenet

 /// <exception cref="System.IO.IOException"></exception>
 public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs
     )
 {
     return new ContainsVisitor(this, context, acceptDocs).Visit(grid.WorldCell, acceptDocs);
 }

Exemple #9

0

Afficher le fichier

Fichier : ContainsPrefixTreeFilter.cs Projet : Cefa68000/lucenenet

 //see getLeafDocs
 /// <summary>This is the primary algorithm; recursive.</summary>
 /// <remarks>This is the primary algorithm; recursive.  Returns null if finds none.</remarks>
 /// <exception cref="System.IO.IOException"></exception>
 internal SmallDocSet Visit(Cell cell, IBits acceptContains
     )
 {
     if (termsEnum == null)
     {
         //signals all done
         return null;
     }
     //Leaf docs match all query shape
     SmallDocSet leafDocs = GetLeafDocs(cell, acceptContains);
     // Get the AND of all child results
     SmallDocSet combinedSubResults = null;
     ICollection<Cell> subCells = cell.GetSubCells(_enclosing.queryShape);
     foreach (Cell subCell in subCells)
     {
         if (!SeekExact(subCell))
         {
             combinedSubResults = null;
         }
         else
         {
             if (subCell.Level == _enclosing.detailLevel)
             {
                 combinedSubResults = GetDocs(subCell, acceptContains);
             }
             else
             {
                 if (subCell.GetShapeRel() == SpatialRelation.WITHIN)
                 {
                     combinedSubResults = GetLeafDocs(subCell, acceptContains);
                 }
                 else
                 {
                     combinedSubResults = Visit(subCell, acceptContains);
                 }
             }
         }
         //recursion
         if (combinedSubResults == null)
         {
             break;
         }
         acceptContains = combinedSubResults;
     }
     //has the 'AND' effect on next iteration
     // Result: OR the leaf docs with AND of all child results
     if (combinedSubResults != null)
     {
         if (leafDocs == null)
         {
             return combinedSubResults;
         }
         return leafDocs.Union(combinedSubResults);
     }
     return leafDocs;
 }

Exemple #10

0

Afficher le fichier

Fichier : DisjointSpatialFilter.cs Projet : Cefa68000/lucenenet

 /// <exception cref="System.IO.IOException"></exception>
 public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs
     )
 {
     IBits docsWithField;
     if (field == null)
     {
         docsWithField = null;
     }
     else
     {
         //all docs
         //NOTE By using the FieldCache we re-use a cache
         // which is nice but loading it in this way might be slower than say using an
         // intersects filter against the world bounds. So do we add a method to the
         // strategy, perhaps?  But the strategy can't cache it.
         docsWithField = FieldCache.DEFAULT.GetDocsWithField((context.AtomicReader), field);
         int maxDoc = context.AtomicReader.MaxDoc;
         if (docsWithField.Length != maxDoc)
         {
             throw new InvalidOperationException("Bits length should be maxDoc (" + maxDoc + ") but wasn't: "
                                                 + docsWithField);
         }
         if (docsWithField is Bits.MatchNoBits)
         {
             return null;
         }
         else
         {
             //match nothing
             if (docsWithField is Bits.MatchAllBits)
             {
                 docsWithField = null;
             }
         }
     }
     //all docs
     //not so much a chain but a way to conveniently invert the Filter
     DocIdSet docIdSet = new ChainedFilter(new[] { intersectsFilter }, ChainedFilter.ANDNOT).GetDocIdSet(context,
                                                                                                       acceptDocs);
     return BitsFilteredDocIdSet.Wrap(docIdSet, docsWithField);
 }

Exemple #11

0

Afficher le fichier

Fichier : ContainsPrefixTreeFilter.cs Projet : Cefa68000/lucenenet

 /// <exception cref="System.IO.IOException"></exception>
 private ContainsPrefixTreeFilter.SmallDocSet GetDocs(Cell cell, IBits acceptContains
     )
 {
     System.Diagnostics.Debug.Assert(new BytesRef(cell.GetTokenBytes().ToSByteArray()).Equals(this.termBytes
         ));
     return this.CollectDocs(acceptContains);
 }

Exemple #12

0

Afficher le fichier

Fichier : TestScorerPerf.cs Projet : zhuthree/lucenenet

 public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs)
 {
     Assert.IsNull(acceptDocs, "acceptDocs should be null, as we have an index without deletions");
     return(new DocIdBitSet(Rnd));
 }

Exemple #13

0

Afficher le fichier

Fichier : FilterAtomicReader.cs Projet : thulyacloud/lucenenet

 public override DocsAndPositionsEnum DocsAndPositions(IBits liveDocs, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags)
 {
     return(m_input.DocsAndPositions(liveDocs, reuse, flags));
 }

Exemple #14

0

Afficher le fichier

 /// <exception cref="System.IO.IOException"/>
 public AllScorer(FunctionQuery outerInstance, AtomicReaderContext context, IBits acceptDocs, FunctionWeight w, float qWeight)
     : base(w)
 {
     this.outerInstance = outerInstance;
     this.weight        = w;
     this.qWeight       = qWeight;
     this.reader        = context.Reader;
     this.maxDoc        = reader.MaxDoc;
     this.acceptDocs    = acceptDocs;
     vals = outerInstance.func.GetValues(weight.m_context, context);
 }

Exemple #15

0

Afficher le fichier

Fichier : DocTermOrdsRewriteMethod.cs Projet : simixsistemas/lucenenet

 public FieldCacheDocIdSetAnonymousInnerClassHelper(MultiTermQueryDocTermOrdsWrapperFilter outerInstance, int maxDoc, IBits acceptDocs, SortedSetDocValues docTermOrds, Int64BitSet termSet)
     : base(maxDoc, acceptDocs)
 {
     this.outerInstance = outerInstance;
     this.docTermOrds   = docTermOrds;
     this.termSet       = termSet;
 }

Exemple #16

0

Afficher le fichier

Fichier : NearSpansOrdered.cs Projet : YAFNET/YAFNET

        public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, IBits acceptDocs, IDictionary <Term, TermContext> termContexts, bool collectPayloads)
        {
            // LUCENENET: Added guard clauses for null
            if (spanNearQuery is null)
            {
                throw new ArgumentNullException(nameof(spanNearQuery));
            }

            sorter = new InPlaceMergeSorterAnonymousClass(this);
            if (spanNearQuery.GetClauses().Length < 2)
            {
                throw new ArgumentException("Less than 2 clauses: " + spanNearQuery);
            }
            this.collectPayloads = collectPayloads;
            allowedSlop          = spanNearQuery.Slop;
            SpanQuery[] clauses = spanNearQuery.GetClauses();
            subSpans      = new Spans[clauses.Length];
            matchPayload  = new JCG.List <byte[]>();
            subSpansByDoc = new Spans[clauses.Length];
            for (int i = 0; i < clauses.Length; i++)
            {
                subSpans[i]      = clauses[i].GetSpans(context, acceptDocs, termContexts);
                subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
            }
            query = spanNearQuery;              // kept for toString() only.
        }

Exemple #17

0

Afficher le fichier

Fichier : EnumFieldSource.cs Projet : voquanghoa/YAFNET

 public Int32DocValuesAnonymousInnerClassHelper(EnumFieldSource outerInstance, EnumFieldSource @this, FieldCache.Int32s arr, IBits valid)
     : base(@this)
 {
     this.outerInstance = outerInstance;
     this.arr           = arr;
     this.valid         = valid;
     val = new MutableValueInt32();
 }

Exemple #18

0

Afficher le fichier

Fichier : NearSpansOrdered.cs Projet : YAFNET/YAFNET

        private readonly bool collectPayloads = true; // LUCENENET: marked readonly

        public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, IBits acceptDocs, IDictionary <Term, TermContext> termContexts)
            : this(spanNearQuery, context, acceptDocs, termContexts, true)
        {
        }

Exemple #19

0

Afficher le fichier

 public override DocsAndPositionsEnum DocsAndPositions(IBits bits, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags)
 {
     return(tenum.DocsAndPositions(bits, reuse, flags));
 }

Exemple #20

0

Afficher le fichier

 public override DocsEnum Docs(IBits bits, DocsEnum reuse, DocsFlags flags)
 {
     return(tenum.Docs(bits, reuse, flags));
 }

Exemple #21

0

Afficher le fichier

        /// <summary>
        /// Assert that the content of the <see cref="DocIdSet"/> is the same as the content of the <see cref="BitArray"/>.
        /// </summary>
#pragma warning disable xUnit1013
        public virtual void AssertEquals(int numBits, BitArray ds1, T ds2)
#pragma warning restore xUnit1013
        {
            // nextDoc
            DocIdSetIterator it2 = ds2.GetIterator();

            if (it2 == null)
            {
                Assert.AreEqual(-1, ds1.NextSetBit(0));
            }
            else
            {
                Assert.AreEqual(-1, it2.DocID);
                for (int doc = ds1.NextSetBit(0); doc != -1; doc = ds1.NextSetBit(doc + 1))
                {
                    Assert.AreEqual(doc, it2.NextDoc());
                    Assert.AreEqual(doc, it2.DocID);
                }
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.NextDoc());
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, it2.DocID);
            }

            // nextDoc / advance
            it2 = ds2.GetIterator();
            if (it2 == null)
            {
                Assert.AreEqual(-1, ds1.NextSetBit(0));
            }
            else
            {
                for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;)
                {
                    if (Random.NextBoolean())
                    {
                        doc = ds1.NextSetBit(doc + 1);
                        if (doc == -1)
                        {
                            doc = DocIdSetIterator.NO_MORE_DOCS;
                        }
                        Assert.AreEqual(doc, it2.NextDoc());
                        Assert.AreEqual(doc, it2.DocID);
                    }
                    else
                    {
                        int target = doc + 1 + Random.Next(Random.NextBoolean() ? 64 : Math.Max(numBits / 8, 1));
                        doc = ds1.NextSetBit(target);
                        if (doc == -1)
                        {
                            doc = DocIdSetIterator.NO_MORE_DOCS;
                        }
                        Assert.AreEqual(doc, it2.Advance(target));
                        Assert.AreEqual(doc, it2.DocID);
                    }
                }
            }

            // bits()
            IBits bits = ds2.Bits;

            if (bits != null)
            {
                // test consistency between bits and iterator
                it2 = ds2.GetIterator();
                for (int previousDoc = -1, doc = it2.NextDoc(); ; previousDoc = doc, doc = it2.NextDoc())
                {
                    int max = doc == DocIdSetIterator.NO_MORE_DOCS ? bits.Length : doc;
                    for (int i = previousDoc + 1; i < max; ++i)
                    {
                        Assert.AreEqual(false, bits.Get(i));
                    }
                    if (doc == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        break;
                    }
                    Assert.AreEqual(true, bits.Get(doc));
                }
            }
        }

Exemple #22

0

Afficher le fichier

Fichier : Lucene40LiveDocsFormat.cs Projet : azhoshkin/lucenenet

        public override IMutableBits NewLiveDocs(IBits existing)
        {
            BitVector liveDocs = (BitVector)existing;

            return((BitVector)liveDocs.Clone());
        }

Exemple #23

0

Afficher le fichier

 public override Scorer GetScorer(AtomicReaderContext ctx, IBits acceptDocs)
 {
     return(new AllScorer(outerInstance, ctx, acceptDocs, this, m_queryWeight));
 }

Exemple #24

0

Afficher le fichier

Fichier : TestIndexWriterWithThreads.cs Projet : guoli608/lucenenet

        // Runs test, with multiple threads, using the specific
        // failure to trigger an IOException
        public virtual void TestMultipleThreadsFailure(Func <IConcurrentMergeScheduler> newScheduler, MockDirectoryWrapper.Failure failure)
        {
            int NUM_THREADS = 3;

            for (int iter = 0; iter < 2; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                MockDirectoryWrapper dir = NewMockDirectory();
                var config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                             .SetMaxBufferedDocs(2)
                             .SetMergeScheduler(newScheduler())
                             .SetMergePolicy(NewLogMergePolicy(4));
                IndexWriter writer    = new IndexWriter(dir, config);
                var         scheduler = config.mergeScheduler as IConcurrentMergeScheduler;
                if (scheduler != null)
                {
                    scheduler.SetSuppressExceptions();
                }

                IndexerThread[] threads = new IndexerThread[NUM_THREADS];

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i] = new IndexerThread(writer, true, NewField);
                }

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Start();
                }

                Thread.Sleep(10);

                dir.FailOn(failure);
                failure.SetDoFail();

                for (int i = 0; i < NUM_THREADS; i++)
                {
                    threads[i].Join();
                    Assert.IsTrue(threads[i].Error == null, "hit unexpected Throwable");
                }

                bool success = false;
                try
                {
                    writer.Dispose(false);
                    success = true;
                }
                catch (IOException)
                {
                    failure.ClearDoFail();
                    writer.Dispose(false);
                }
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: success=" + success);
                }

                if (success)
                {
                    IndexReader reader  = DirectoryReader.Open(dir);
                    IBits       delDocs = MultiFields.GetLiveDocs(reader);
                    for (int j = 0; j < reader.MaxDoc; j++)
                    {
                        if (delDocs == null || !delDocs.Get(j))
                        {
                            reader.Document(j);
                            reader.GetTermVectors(j);
                        }
                    }
                    reader.Dispose();
                }

                dir.Dispose();
            }
        }

Exemple #25

0

Afficher le fichier

Fichier : DocTermOrds.cs Projet : ywscr/lucenenet

 public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags)
 {
     return(termsEnum.Docs(liveDocs, reuse, flags));
 }

Exemple #26

0

Afficher le fichier

        public override int DoLogic()
        {
            int res = 0;

            // open reader or use existing one
            IndexSearcher searcher = RunData.GetIndexSearcher();

            IndexReader reader;

            bool closeSearcher;

            if (searcher == null)
            {
                // open our own reader
                Directory dir = RunData.Directory;
                reader        = DirectoryReader.Open(dir);
                searcher      = new IndexSearcher(reader);
                closeSearcher = true;
            }
            else
            {
                // use existing one; this passes +1 ref to us
                reader        = searcher.IndexReader;
                closeSearcher = false;
            }

            // optionally warm and add num docs traversed to count
            if (WithWarm)
            {
                Document doc      = null;
                IBits    liveDocs = MultiFields.GetLiveDocs(reader);
                for (int m = 0; m < reader.MaxDoc; m++)
                {
                    if (null == liveDocs || liveDocs.Get(m))
                    {
                        doc  = reader.Document(m);
                        res += (doc == null ? 0 : 1);
                    }
                }
            }

            if (WithSearch)
            {
                res++;
                Query   q       = queryMaker.MakeQuery();
                Sort    sort    = Sort;
                TopDocs hits    = null;
                int     numHits = NumHits;
                if (numHits > 0)
                {
                    if (WithCollector == false)
                    {
                        if (sort != null)
                        {
                            // TODO: instead of always passing false we
                            // should detect based on the query; if we make
                            // the IndexSearcher search methods that take
                            // Weight public again, we can go back to
                            // pulling the Weight ourselves:
                            TopFieldCollector collector = TopFieldCollector.Create(sort, numHits,
                                                                                   true, WithScore,
                                                                                   WithMaxScore,
                                                                                   false);
                            searcher.Search(q, null, collector);
                            hits = collector.GetTopDocs();
                        }
                        else
                        {
                            hits = searcher.Search(q, numHits);
                        }
                    }
                    else
                    {
                        ICollector collector = CreateCollector();
                        searcher.Search(q, null, collector);
                        //hits = collector.topDocs();
                    }

                    string printHitsField = RunData.Config.Get("print.hits.field", null);
                    if (hits != null && printHitsField != null && printHitsField.Length > 0)
                    {
                        Console.WriteLine("totalHits = " + hits.TotalHits);
                        Console.WriteLine("maxDoc()  = " + reader.MaxDoc);
                        Console.WriteLine("numDocs() = " + reader.NumDocs);
                        for (int i = 0; i < hits.ScoreDocs.Length; i++)
                        {
                            int      docID = hits.ScoreDocs[i].Doc;
                            Document doc   = reader.Document(docID);
                            Console.WriteLine("  " + i + ": doc=" + docID + " score=" + hits.ScoreDocs[i].Score + " " + printHitsField + " =" + doc.Get(printHitsField));
                        }
                    }

                    if (WithTraverse)
                    {
                        ScoreDoc[] scoreDocs     = hits.ScoreDocs;
                        int        traversalSize = Math.Min(scoreDocs.Length, TraversalSize);

                        if (traversalSize > 0)
                        {
                            bool                 retrieve     = WithRetrieve;
                            int                  numHighlight = Math.Min(NumToHighlight, scoreDocs.Length);
                            Analyzer             analyzer     = RunData.Analyzer;
                            BenchmarkHighlighter highlighter  = null;
                            if (numHighlight > 0)
                            {
                                highlighter = GetBenchmarkHighlighter(q);
                            }
                            for (int m = 0; m < traversalSize; m++)
                            {
                                int id = scoreDocs[m].Doc;
                                res++;
                                if (retrieve)
                                {
                                    Document document = RetrieveDoc(reader, id);
                                    res += document != null ? 1 : 0;
                                    if (numHighlight > 0 && m < numHighlight)
                                    {
                                        ICollection <string> fieldsToHighlight = GetFieldsToHighlight(document);
                                        foreach (string field in fieldsToHighlight)
                                        {
                                            string text = document.Get(field);
                                            res += highlighter.DoHighlight(reader, id, field, document, analyzer, text);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            if (closeSearcher)
            {
                reader.Dispose();
            }
            else
            {
                // Release our +1 ref from above
                reader.DecRef();
            }
            return(res);
        }

Exemple #27

0

Afficher le fichier

        /// <summary>
        /// Fills a <see cref="T:IDictionary{string, WeightedSpanTerm}"/> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <see cref="SpanQuery"/>.
        /// </summary>
        /// <param name="terms"><see cref="T:IDictionary{string, WeightedSpanTerm}"/> to place created <see cref="WeightedSpanTerm"/>s in</param>
        /// <param name="spanQuery"><see cref="SpanQuery"/> to extract Terms from</param>
        /// <exception cref="IOException">If there is a low-level I/O error</exception>
        protected virtual void ExtractWeightedSpanTerms(IDictionary <string, WeightedSpanTerm> terms, SpanQuery spanQuery)
        {
            ISet <string> fieldNames;

            if (fieldName == null)
            {
                fieldNames = new JCG.HashSet <string>();
                CollectSpanQueryFields(spanQuery, fieldNames);
            }
            else
            {
                fieldNames = new JCG.HashSet <string>
                {
                    fieldName
                };
            }
            // To support the use of the default field name
            if (defaultField != null)
            {
                fieldNames.Add(defaultField);
            }

            IDictionary <string, SpanQuery> queries = new JCG.Dictionary <string, SpanQuery>();

            var  nonWeightedTerms = new JCG.HashSet <Term>();
            bool mustRewriteQuery = MustRewriteQuery(spanQuery);

            if (mustRewriteQuery)
            {
                foreach (string field in fieldNames)
                {
                    SpanQuery rewrittenQuery = (SpanQuery)spanQuery.Rewrite(GetLeafContext().Reader);
                    queries[field] = rewrittenQuery;
                    rewrittenQuery.ExtractTerms(nonWeightedTerms);
                }
            }
            else
            {
                spanQuery.ExtractTerms(nonWeightedTerms);
            }

            List <PositionSpan> spanPositions = new List <PositionSpan>();

            foreach (string field in fieldNames)
            {
                SpanQuery q;
                q = mustRewriteQuery ? queries[field] : spanQuery;

                AtomicReaderContext context = GetLeafContext();
                var         termContexts    = new JCG.Dictionary <Term, TermContext>();
                ISet <Term> extractedTerms  = new JCG.SortedSet <Term>();
                q.ExtractTerms(extractedTerms);
                foreach (Term term in extractedTerms)
                {
                    termContexts[term] = TermContext.Build(context, term);
                }
                IBits       acceptDocs = context.AtomicReader.LiveDocs;
                Spans.Spans spans      = q.GetSpans(context, acceptDocs, termContexts);

                // collect span positions
                while (spans.MoveNext())
                {
                    spanPositions.Add(new PositionSpan(spans.Start, spans.End - 1));
                }
            }

            if (spanPositions.Count == 0)
            {
                // no spans found
                return;
            }

            foreach (Term queryTerm in nonWeightedTerms)
            {
                if (FieldNameComparer(queryTerm.Field))
                {
                    if (!terms.TryGetValue(queryTerm.Text(), out WeightedSpanTerm weightedSpanTerm) || weightedSpanTerm == null)
                    {
                        weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text());
                        weightedSpanTerm.AddPositionSpans(spanPositions);
                        weightedSpanTerm.IsPositionSensitive = true;
                        terms[queryTerm.Text()] = weightedSpanTerm;
                    }
                    else
                    {
                        if (spanPositions.Count > 0)
                        {
                            weightedSpanTerm.AddPositionSpans(spanPositions);
                        }
                    }
                }
            }
        }

Exemple #28

0

Afficher le fichier

        public virtual void TestRandom()
        {
            int num = AtLeast(2);

            for (int iter = 0; iter < num; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }

                Directory dir = NewDirectory();

                IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
                // we can do this because we use NoMergePolicy (and dont merge to "nothing")
                w.KeepFullyDeletedSegments = true;

                IDictionary <BytesRef, IList <int?> > docs = new Dictionary <BytesRef, IList <int?> >();
                HashSet <int?>   deleted = new HashSet <int?>();
                IList <BytesRef> terms   = new List <BytesRef>();

                int numDocs            = TestUtil.NextInt(Random(), 1, 100 * RANDOM_MULTIPLIER);
                Documents.Document doc = new Documents.Document();
                Field f = NewStringField("field", "", Field.Store.NO);
                doc.Add(f);
                Field id = NewStringField("id", "", Field.Store.NO);
                doc.Add(id);

                bool onlyUniqueTerms = Random().NextBoolean();
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
                }
                HashSet <BytesRef> uniqueTerms = new HashSet <BytesRef>();
                for (int i = 0; i < numDocs; i++)
                {
                    if (!onlyUniqueTerms && Random().NextBoolean() && terms.Count > 0)
                    {
                        // re-use existing term
                        BytesRef term = terms[Random().Next(terms.Count)];
                        docs[term].Add(i);
                        f.SetStringValue(term.Utf8ToString());
                    }
                    else
                    {
                        string   s    = TestUtil.RandomUnicodeString(Random(), 10);
                        BytesRef term = new BytesRef(s);
                        if (!docs.ContainsKey(term))
                        {
                            docs[term] = new List <int?>();
                        }
                        docs[term].Add(i);
                        terms.Add(term);
                        uniqueTerms.Add(term);
                        f.SetStringValue(s);
                    }
                    id.SetStringValue("" + i);
                    w.AddDocument(doc);
                    if (Random().Next(4) == 1)
                    {
                        w.Commit();
                    }
                    if (i > 0 && Random().Next(20) == 1)
                    {
                        int delID = Random().Next(i);
                        deleted.Add(delID);
                        w.DeleteDocuments(new Term("id", "" + delID));
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: delete " + delID);
                        }
                    }
                }

                if (VERBOSE)
                {
                    List <BytesRef> termsList = new List <BytesRef>(uniqueTerms);
#pragma warning disable 612, 618
                    termsList.Sort(BytesRef.UTF8SortedAsUTF16Comparer);
#pragma warning restore 612, 618
                    Console.WriteLine("TEST: terms in UTF16 order:");
                    foreach (BytesRef b in termsList)
                    {
                        Console.WriteLine("  " + UnicodeUtil.ToHexString(b.Utf8ToString()) + " " + b);
                        foreach (int docID in docs[b])
                        {
                            if (deleted.Contains(docID))
                            {
                                Console.WriteLine("    " + docID + " (deleted)");
                            }
                            else
                            {
                                Console.WriteLine("    " + docID);
                            }
                        }
                    }
                }

                IndexReader reader = w.GetReader();
                w.Dispose();
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: reader=" + reader);
                }

                IBits liveDocs = MultiFields.GetLiveDocs(reader);
                foreach (int delDoc in deleted)
                {
                    Assert.IsFalse(liveDocs.Get(delDoc));
                }

                for (int i = 0; i < 100; i++)
                {
                    BytesRef term = terms[Random().Next(terms.Count)];
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " " + term);
                    }

                    DocsEnum docsEnum = TestUtil.Docs(Random(), reader, "field", term, liveDocs, null, DocsFlags.NONE);
                    Assert.IsNotNull(docsEnum);

                    foreach (int docID in docs[term])
                    {
                        if (!deleted.Contains(docID))
                        {
                            Assert.AreEqual(docID, docsEnum.NextDoc());
                        }
                    }
                    Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc());
                }

                reader.Dispose();
                dir.Dispose();
            }
        }

Exemple #29

0

Afficher le fichier

Fichier : SortedDocValuesTermsEnum.cs Projet : sycct/lucenenet

 public override DocsAndPositionsEnum DocsAndPositions(IBits liveDocs, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags)
 {
     throw new System.NotSupportedException();
 }

Exemple #30

0

Afficher le fichier

Fichier : BooleanQuery.cs Projet : voquanghoa/YAFNET

            public override BulkScorer GetBulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, IBits acceptDocs)
            {
                if (scoreDocsInOrder || outerInstance.m_minNrShouldMatch > 1)
                {
                    // TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch
                    // but the same is even true of pure conjunctions...
                    return(base.GetBulkScorer(context, scoreDocsInOrder, acceptDocs));
                }

                IList <BulkScorer> prohibited = new List <BulkScorer>();
                IList <BulkScorer> optional   = new List <BulkScorer>();

                using (IEnumerator <BooleanClause> cIter = outerInstance.clauses.GetEnumerator())
                {
                    foreach (Weight w in m_weights)
                    {
                        cIter.MoveNext();
                        BooleanClause c         = cIter.Current;
                        BulkScorer    subScorer = w.GetBulkScorer(context, false, acceptDocs);
                        if (subScorer == null)
                        {
                            if (c.IsRequired)
                            {
                                return(null);
                            }
                        }
                        else if (c.IsRequired)
                        {
                            // TODO: there are some cases where BooleanScorer
                            // would handle conjunctions faster than
                            // BooleanScorer2...
                            return(base.GetBulkScorer(context, scoreDocsInOrder, acceptDocs));
                        }
                        else if (c.IsProhibited)
                        {
                            prohibited.Add(subScorer);
                        }
                        else
                        {
                            optional.Add(subScorer);
                        }
                    }
                }

                // Check if we can and should return a BooleanScorer
                return(new BooleanScorer(this, disableCoord, outerInstance.m_minNrShouldMatch, optional, prohibited, m_maxCoord));
            }

Exemple #31

0

Afficher le fichier

Fichier : ContainsPrefixTreeFilter.cs Projet : Cefa68000/lucenenet

 /// <exception cref="System.IO.IOException"></exception>
 private ContainsPrefixTreeFilter.SmallDocSet CollectDocs(IBits acceptContains)
 {
     ContainsPrefixTreeFilter.SmallDocSet set = null;
     this.docsEnum = this.termsEnum.Docs(acceptContains, this.docsEnum, DocsEnum.FLAG_NONE
         );
     int docid;
     while ((docid = this.docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
     {
         if (set == null)
         {
             int size = this.termsEnum.DocFreq;
             if (size <= 0)
             {
                 size = 16;
             }
             set = new ContainsPrefixTreeFilter.SmallDocSet(size);
         }
         set.Set(docid);
     }
     return set;
 }

Exemple #32

0

Afficher le fichier

Fichier : BooleanQuery.cs Projet : voquanghoa/YAFNET

            public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs)
            {
                IList <Scorer> required           = new List <Scorer>();
                IList <Scorer> prohibited         = new List <Scorer>();
                IList <Scorer> optional           = new List <Scorer>();
                IEnumerator <BooleanClause> cIter = outerInstance.clauses.GetEnumerator();

                foreach (Weight w in m_weights)
                {
                    cIter.MoveNext();
                    BooleanClause c         = cIter.Current;
                    Scorer        subScorer = w.GetScorer(context, acceptDocs);
                    if (subScorer == null)
                    {
                        if (c.IsRequired)
                        {
                            return(null);
                        }
                    }
                    else if (c.IsRequired)
                    {
                        required.Add(subScorer);
                    }
                    else if (c.IsProhibited)
                    {
                        prohibited.Add(subScorer);
                    }
                    else
                    {
                        optional.Add(subScorer);
                    }
                }

                if (required.Count == 0 && optional.Count == 0)
                {
                    // no required and optional clauses.
                    return(null);
                }
                else if (optional.Count < outerInstance.m_minNrShouldMatch)
                {
                    // either >1 req scorer, or there are 0 req scorers and at least 1
                    // optional scorer. Therefore if there are not enough optional scorers
                    // no documents will be matched by the query
                    return(null);
                }

                // simple conjunction
                if (optional.Count == 0 && prohibited.Count == 0)
                {
                    float coord = disableCoord ? 1.0f : Coord(required.Count, m_maxCoord);
                    return(new ConjunctionScorer(this, required.ToArray(), coord));
                }

                // simple disjunction
                if (required.Count == 0 && prohibited.Count == 0 && outerInstance.m_minNrShouldMatch <= 1 && optional.Count > 1)
                {
                    var coord = new float[optional.Count + 1];
                    for (int i = 0; i < coord.Length; i++)
                    {
                        coord[i] = disableCoord ? 1.0f : Coord(i, m_maxCoord);
                    }
                    return(new DisjunctionSumScorer(this, optional.ToArray(), coord));
                }

                // Return a BooleanScorer2
                return(new BooleanScorer2(this, disableCoord, outerInstance.m_minNrShouldMatch, required, prohibited, optional, m_maxCoord));
            }

Exemple #33

0

Afficher le fichier

Fichier : ContainsPrefixTreeFilter.cs Projet : Cefa68000/lucenenet

 /// <exception cref="System.IO.IOException"></exception>
 private ContainsPrefixTreeFilter.SmallDocSet GetLeafDocs(Cell leafCell, IBits acceptContains)
 {
     System.Diagnostics.Debug.Assert(new BytesRef(leafCell.GetTokenBytes().ToSByteArray()).Equals(this
         .termBytes));
     BytesRef nextTerm = this.termsEnum.Next();
     if (nextTerm == null)
     {
         this.termsEnum = null;
         //signals all done
         return null;
     }
     this.nextCell = this._enclosing.grid.GetCell(nextTerm.bytes.ToByteArray(), nextTerm.offset, nextTerm
         .length, this.nextCell);
     if (this.nextCell.Level == leafCell.Level && this.nextCell.IsLeaf())
     {
         return this.CollectDocs(acceptContains);
     }
     else
     {
         return null;
     }
 }

Exemple #34

0

Afficher le fichier

Fichier : PostingsReaderBase.cs Projet : wow64bb/YAFNET

 /// <summary>
 /// Must fully consume state, since after this call that
 /// <see cref="TermState"/> may be reused.
 /// </summary>
 public abstract DocsAndPositionsEnum DocsAndPositions(FieldInfo fieldInfo, BlockTermState state, IBits skipDocs, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags);

Exemple #35

0

Afficher le fichier

Fichier : ContainsPrefixTreeFilter.cs Projet : Cefa68000/lucenenet

 /// <exception cref="System.IO.IOException"></exception>
 public ContainsVisitor(ContainsPrefixTreeFilter _enclosing, AtomicReaderContext context
     , IBits acceptDocs)
     : base(_enclosing, context, acceptDocs)
 {
     this._enclosing = _enclosing;
 }

Exemple #36

0

Afficher le fichier

Fichier : DocTermOrds.cs Projet : ywscr/lucenenet

 /// <summary>
 /// Inverts all terms </summary>
 public DocTermOrds(AtomicReader reader, IBits liveDocs, string field)
     : this(reader, liveDocs, field, null, int.MaxValue)
 {
 }

Exemple #37

0

Afficher le fichier

Fichier : FloatFieldSource.cs Projet : YAFNET/YAFNET

 public SingleDocValuesAnonymousClass(SingleFieldSource @this, FieldCache.Singles arr, IBits valid)
     : base(@this)
 {
     this.arr   = arr;
     this.valid = valid;
 }

Exemple #38

0

Afficher le fichier

Fichier : DocTermOrds.cs Projet : ywscr/lucenenet

 /// <summary>
 /// Inverts only terms starting w/ prefix </summary>
 public DocTermOrds(AtomicReader reader, IBits liveDocs, string field, BytesRef termPrefix)
     : this(reader, liveDocs, field, termPrefix, int.MaxValue)
 {
 }

Exemple #39

0

Afficher le fichier

Fichier : IntersectsPrefixTreeFilter.cs Projet : Cefa68000/lucenenet

 /// <exception cref="System.IO.IOException"></exception>
 public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs
     )
 {
     return new _VisitorTemplate_55(this, context, acceptDocs, hasIndexedLeaves).GetDocIdSet
         ();
 }

Exemple #40

0

Afficher le fichier

Fichier : DistanceValueSource.cs Projet : Cefa68000/lucenenet

            public DistanceFunctionValue(DistanceValueSource enclosingInstance, AtomicReader reader)
            {
                this.enclosingInstance = enclosingInstance;

                ptX = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.FieldNameX, true);
                ptY = FieldCache.DEFAULT.GetDoubles(reader, enclosingInstance.strategy.FieldNameY, true);
                validX = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.FieldNameX);
                validY = FieldCache.DEFAULT.GetDocsWithField(reader, enclosingInstance.strategy.FieldNameY);

                from = enclosingInstance.from;
                calculator = enclosingInstance.strategy.SpatialContext.GetDistCalc();
                nullValue = (enclosingInstance.strategy.SpatialContext.IsGeo() ? 180 : double.MaxValue);
            }

Exemple #41

0

Afficher le fichier

Fichier : AbstractPrefixTreeFilter.cs Projet : Cefa68000/lucenenet

 /// <exception cref="System.IO.IOException"></exception>
 public BaseTermsEnumTraverser(AbstractPrefixTreeFilter _enclosing, AtomicReaderContext
     context, IBits acceptDocs)
 {
     this._enclosing = _enclosing;
     //remember to check for null in getDocIdSet
     this.context = context;
     AtomicReader reader = context.AtomicReader;
     this.acceptDocs = acceptDocs;
     maxDoc = reader.MaxDoc;
     Terms terms = reader.Terms(this._enclosing.fieldName);
     if (terms != null)
     {
         termsEnum = terms.Iterator(null);
     }
 }

Exemple #42

0

Afficher le fichier

 public ToChildBlockJoinScorer(Weight weight, Scorer parentScorer, FixedBitSet parentBits, bool doScores, IBits acceptDocs)
     : base(weight)
 {
     _doScores     = doScores;
     _parentBits   = parentBits;
     _parentScorer = parentScorer;
     _acceptDocs   = acceptDocs;
 }

Exemple #43

0

Afficher le fichier

Fichier : DocTermOrds.cs Projet : ywscr/lucenenet

 /// <summary>
 /// Inverts only terms starting w/ prefix, and only terms
 /// whose docFreq (not taking deletions into account) is
 /// &lt;= <paramref name="maxTermDocFreq"/>
 /// </summary>
 public DocTermOrds(AtomicReader reader, IBits liveDocs, string field, BytesRef termPrefix, int maxTermDocFreq)
     : this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS)
 {
 }

Exemple #44

0

Afficher le fichier

Fichier : CompressingStoredFieldsWriter.cs Projet : sycct/lucenenet

        public override int Merge(MergeState mergeState)
        {
            int docCount = 0;
            int idx      = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                CompressingStoredFieldsReader matchingFieldsReader = null;
                if (matchingSegmentReader != null)
                {
                    StoredFieldsReader fieldsReader = matchingSegmentReader.FieldsReader;
                    // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
                    if (fieldsReader != null && fieldsReader is CompressingStoredFieldsReader)
                    {
                        matchingFieldsReader = (CompressingStoredFieldsReader)fieldsReader;
                    }
                }

                int   maxDoc   = reader.MaxDoc;
                IBits liveDocs = reader.LiveDocs;

                if (matchingFieldsReader == null || matchingFieldsReader.Version != VERSION_CURRENT || matchingFieldsReader.CompressionMode != compressionMode || matchingFieldsReader.ChunkSize != chunkSize) // the way data is decompressed depends on the chunk size -  means reader version is not the same as the writer version
                {
                    // naive merge...
                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                    {
                        Document doc = reader.Document(i);
                        AddDocument(doc, mergeState.FieldInfos);
                        ++docCount;
                        mergeState.CheckAbort.Work(300);
                    }
                }
                else
                {
                    int docID = NextLiveDoc(0, liveDocs, maxDoc);
                    if (docID < maxDoc)
                    {
                        // not all docs were deleted
                        CompressingStoredFieldsReader.ChunkIterator it = matchingFieldsReader.GetChunkIterator(docID);
                        int[] startOffsets = new int[0];
                        do
                        {
                            // go to the next chunk that contains docID
                            it.Next(docID);
                            // transform lengths into offsets
                            if (startOffsets.Length < it.chunkDocs)
                            {
                                startOffsets = new int[ArrayUtil.Oversize(it.chunkDocs, 4)];
                            }
                            for (int i = 1; i < it.chunkDocs; ++i)
                            {
                                startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1];
                            }

                            if (numBufferedDocs == 0 && startOffsets[it.chunkDocs - 1] < chunkSize && startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize && NextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) // no deletion in the chunk -  chunk is large enough -  chunk is small enough -  starting a new chunk
                            {
                                Debug.Assert(docID == it.docBase);

                                // no need to decompress, just copy data
                                indexWriter.WriteIndex(it.chunkDocs, fieldsStream.GetFilePointer());
                                WriteHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths);
                                it.CopyCompressedData(fieldsStream);
                                this.docBase += it.chunkDocs;
                                docID         = NextLiveDoc(it.docBase + it.chunkDocs, liveDocs, maxDoc);
                                docCount     += it.chunkDocs;
                                mergeState.CheckAbort.Work(300 * it.chunkDocs);
                            }
                            else
                            {
                                // decompress
                                it.Decompress();
                                if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.Length)
                                {
                                    throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.Length);
                                }
                                // copy non-deleted docs
                                for (; docID < it.docBase + it.chunkDocs; docID = NextLiveDoc(docID + 1, liveDocs, maxDoc))
                                {
                                    int diff = docID - it.docBase;
                                    StartDocument(it.numStoredFields[diff]);
                                    bufferedDocs.WriteBytes(it.bytes.Bytes, it.bytes.Offset + startOffsets[diff], it.lengths[diff]);
                                    FinishDocument();
                                    ++docCount;
                                    mergeState.CheckAbort.Work(300);
                                }
                            }
                        } while (docID < maxDoc);

                        it.CheckIntegrity();
                    }
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return(docCount);
        }

Exemple #45

0

Afficher le fichier

Fichier : DocTermOrds.cs Projet : ywscr/lucenenet

 /// <summary>
 /// Inverts only terms starting w/ prefix, and only terms
 /// whose docFreq (not taking deletions into account) is
 /// &lt;=  <paramref name="maxTermDocFreq"/>, with a custom indexing interval
 /// (default is every 128nd term).
 /// </summary>
 public DocTermOrds(AtomicReader reader, IBits liveDocs, string field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits)
     : this(field, maxTermDocFreq, indexIntervalBits)
 {
     Uninvert(reader, liveDocs, termPrefix);
 }

Exemple #46

0

Afficher le fichier

Fichier : DocTermOrds.cs Projet : ywscr/lucenenet

        /// <summary>
        /// Call this only once (if you subclass!) </summary>
        protected virtual void Uninvert(AtomicReader reader, IBits liveDocs, BytesRef termPrefix)
        {
            FieldInfo info = reader.FieldInfos.FieldInfo(m_field);

            if (info != null && info.HasDocValues)
            {
                throw IllegalStateException.Create("Type mismatch: " + m_field + " was indexed as " + info.DocValuesType);
            }
            //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
            long startTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            m_prefix = termPrefix == null ? null : BytesRef.DeepCopyOf(termPrefix);

            int maxDoc = reader.MaxDoc;

            int[] index    = new int[maxDoc];     // immediate term numbers, or the index into the byte[] representing the last number
            int[] lastTerm = new int[maxDoc];     // last term we saw for this document
            var   bytes    = new sbyte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

            Fields fields = reader.Fields;

            if (fields == null)
            {
                // No terms
                return;
            }
            Terms terms = fields.GetTerms(m_field);

            if (terms == null)
            {
                // No terms
                return;
            }

            TermsEnum te        = terms.GetEnumerator();
            BytesRef  seekStart = termPrefix ?? new BytesRef();

            //System.out.println("seekStart=" + seekStart.utf8ToString());
            if (te.SeekCeil(seekStart) == TermsEnum.SeekStatus.END)
            {
                // No terms match
                return;
            }

            // If we need our "term index wrapper", these will be
            // init'd below:
            IList <BytesRef> indexedTerms      = null;
            PagedBytes       indexedTermsBytes = null;

            bool testedOrd = false;

            // we need a minimum of 9 bytes, but round up to 12 since the space would
            // be wasted with most allocators anyway.
            var tempArr = new sbyte[12];

            //
            // enumerate all terms, and build an intermediate form of the un-inverted field.
            //
            // During this intermediate form, every document has a (potential) byte[]
            // and the int[maxDoc()] array either contains the termNumber list directly
            // or the *end* offset of the termNumber list in it's byte array (for faster
            // appending and faster creation of the final form).
            //
            // idea... if things are too large while building, we could do a range of docs
            // at a time (but it would be a fair amount slower to build)
            // could also do ranges in parallel to take advantage of multiple CPUs

            // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
            // values.  this requires going over the field first to find the most
            // frequent terms ahead of time.

            int termNum = 0;

            m_docsEnum = null;

            // Loop begins with te positioned to first term (we call
            // seek above):
            for (; ;)
            {
                BytesRef t = te.Term;
                if (t == null || (termPrefix != null && !StringHelper.StartsWith(t, termPrefix)))
                {
                    break;
                }
                //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

                if (!testedOrd)
                {
                    try
                    {
                        m_ordBase = (int)te.Ord;
                        //System.out.println("got ordBase=" + ordBase);
                    }
                    catch (Exception uoe) when(uoe.IsUnsupportedOperationException())
                    {
                        // Reader cannot provide ord support, so we wrap
                        // our own support by creating our own terms index:
                        indexedTerms      = new JCG.List <BytesRef>();
                        indexedTermsBytes = new PagedBytes(15);
                        //System.out.println("NO ORDS");
                    }
                    testedOrd = true;
                }

                VisitTerm(te, termNum);

                if (indexedTerms != null && (termNum & indexIntervalMask) == 0)
                {
                    // Index this term
                    m_sizeOfIndexedStrings += t.Length;
                    BytesRef indexedTerm = new BytesRef();
                    indexedTermsBytes.Copy(t, indexedTerm);
                    // TODO: really should 1) strip off useless suffix,
                    // and 2) use FST not array/PagedBytes
                    indexedTerms.Add(indexedTerm);
                }

                int df = te.DocFreq;
                if (df <= m_maxTermDocFreq)
                {
                    m_docsEnum = te.Docs(liveDocs, m_docsEnum, DocsFlags.NONE);

                    // dF, but takes deletions into account
                    int actualDF = 0;

                    for (; ;)
                    {
                        int doc = m_docsEnum.NextDoc();
                        if (doc == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }
                        //System.out.println("  chunk=" + chunk + " docs");

                        actualDF++;
                        m_termInstances++;

                        //System.out.println("    docID=" + doc);
                        // add TNUM_OFFSET to the term number to make room for special reserved values:
                        // 0 (end term) and 1 (index into byte array follows)
                        int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                        lastTerm[doc] = termNum;
                        int val = index[doc];

                        if ((val & 0xff) == 1)
                        {
                            // index into byte array (actually the end of
                            // the doc-specific byte[] when building)
                            int pos    = val.TripleShift(8);
                            int ilen   = VInt32Size(delta);
                            var arr    = bytes[doc];
                            int newend = pos + ilen;
                            if (newend > arr.Length)
                            {
                                // We avoid a doubling strategy to lower memory usage.
                                // this faceting method isn't for docs with many terms.
                                // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                                // TODO: figure out what array lengths we can round up to w/o actually using more memory
                                // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                                // It should be safe to round up to the nearest 32 bits in any case.
                                int newLen = (newend + 3) & unchecked ((int)0xfffffffc); // 4 byte alignment
                                var newarr = new sbyte[newLen];
                                Array.Copy(arr, 0, newarr, 0, pos);
                                arr        = newarr;
                                bytes[doc] = newarr;
                            }
                            pos        = WriteInt32(delta, arr, pos);
                            index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                        }
                        else
                        {
                            // OK, this int has data in it... find the end (a zero starting byte - not
                            // part of another number, hence not following a byte with the high bit set).
                            int ipos;
                            if (val == 0)
                            {
                                ipos = 0;
                            }
                            else if ((val & 0x0000ff80) == 0)
                            {
                                ipos = 1;
                            }
                            else if ((val & 0x00ff8000) == 0)
                            {
                                ipos = 2;
                            }
                            else if ((val & 0xff800000) == 0)
                            {
                                ipos = 3;
                            }
                            else
                            {
                                ipos = 4;
                            }

                            //System.out.println("      ipos=" + ipos);

                            int endPos = WriteInt32(delta, tempArr, ipos);
                            //System.out.println("      endpos=" + endPos);
                            if (endPos <= 4)
                            {
                                //System.out.println("      fits!");
                                // value will fit in the integer... move bytes back
                                for (int j = ipos; j < endPos; j++)
                                {
                                    val |= (tempArr[j] & 0xff) << (j << 3);
                                }
                                index[doc] = val;
                            }
                            else
                            {
                                // value won't fit... move integer into byte[]
                                for (int j = 0; j < ipos; j++)
                                {
                                    tempArr[j] = (sbyte)val;
                                    val        = val.TripleShift(8);
                                }
                                // point at the end index in the byte[]
                                index[doc] = (endPos << 8) | 1;
                                bytes[doc] = tempArr;
                                tempArr    = new sbyte[12];
                            }
                        }
                    }
                    SetActualDocFreq(termNum, actualDF);
                }

                termNum++;
                if (!te.MoveNext())
                {
                    break;
                }
            }

            m_numTermsInField = termNum;

            long midPoint = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            if (m_termInstances == 0)
            {
                // we didn't invert anything
                // lower memory consumption.
                m_tnums = null;
            }
            else
            {
                this.m_index = index;

                //
                // transform intermediate form into the final form, building a single byte[]
                // at a time, and releasing the intermediate byte[]s as we go to avoid
                // increasing the memory footprint.
                //

                for (int pass = 0; pass < 256; pass++)
                {
                    var target = m_tnums[pass];
                    var pos    = 0; // end in target;
                    if (target != null)
                    {
                        pos = target.Length;
                    }
                    else
                    {
                        target = new sbyte[4096];
                    }

                    // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
                    // where pp is the pass (which array we are building), and xx is all values.
                    // each pass shares the same byte[] for termNumber lists.
                    for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24))
                    {
                        int lim = Math.Min(docbase + (1 << 16), maxDoc);
                        for (int doc = docbase; doc < lim; doc++)
                        {
                            //System.out.println("  pass="******" process docID=" + doc);
                            int val = index[doc];
                            if ((val & 0xff) == 1)
                            {
                                int len = val.TripleShift(8);
                                //System.out.println("    ptr pos=" + pos);
                                index[doc] = (pos << 8) | 1; // change index to point to start of array
                                if ((pos & 0xff000000) != 0)
                                {
                                    // we only have 24 bits for the array index
                                    throw IllegalStateException.Create("Too many values for UnInvertedField faceting on field " + m_field);
                                }
                                var arr = bytes[doc];

                                /*
                                 * for(byte b : arr) {
                                 * //System.out.println("      b=" + Integer.toHexString((int) b));
                                 * }
                                 */
                                bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                                if (target.Length <= pos + len)
                                {
                                    int newlen = target.Length;

                                    //* we don't have to worry about the array getting too large
                                    // since the "pos" param will overflow first (only 24 bits available)
                                    // if ((newlen<<1) <= 0) {
                                    //  // overflow...
                                    //  newlen = Integer.MAX_VALUE;
                                    //  if (newlen <= pos + len) {
                                    //    throw new SolrException(400,"Too many terms to uninvert field!");
                                    //  }
                                    // } else {
                                    //  while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                                    // }
                                    //
                                    while (newlen <= pos + len) // doubling strategy
                                    {
                                        newlen <<= 1;
                                    }
                                    var newtarget = new sbyte[newlen];
                                    Array.Copy(target, 0, newtarget, 0, pos);
                                    target = newtarget;
                                }
                                Array.Copy(arr, 0, target, pos, len);
                                pos += len + 1; // skip single byte at end and leave it 0 for terminator
                            }
                        }
                    }

                    // shrink array
                    if (pos < target.Length)
                    {
                        var newtarget = new sbyte[pos];
                        Array.Copy(target, 0, newtarget, 0, pos);
                        target = newtarget;
                    }

                    m_tnums[pass] = target;

                    if ((pass << 16) > maxDoc)
                    {
                        break;
                    }
                }
            }
            if (indexedTerms != null)
            {
                m_indexedTermsArray = new BytesRef[indexedTerms.Count];
                indexedTerms.CopyTo(m_indexedTermsArray, 0);
            }

            long endTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            m_total_time  = (int)(endTime - startTime);
            m_phase1_time = (int)(midPoint - startTime);
        }

Exemple #47

0

Afficher le fichier

Fichier : ValueSourceFilter.cs Projet : Cefa68000/lucenenet

 public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs)
 {
     var values = source.GetValues(null, context);
     return new ValueSourceFilteredDocIdSet(startingFilter.GetDocIdSet(context, acceptDocs), values, this);
 }

Exemple #48

0

Afficher le fichier

 public DocIdSetAnonymousInnerClassHelper(IBits acceptDocs, TermsEnum termsEnum)
 {
     this.acceptDocs = acceptDocs;
     this.termsEnum  = termsEnum;
 }

C# (CSharp) IBits Exemples