예제 #1
0
        public override void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea)
        {
            long t0     = System.Environment.TickCount;
            int  maxdoc = reader.MaxDoc;

            BigNestedIntArray.BufferedLoader loader       = GetBufferedLoader(maxdoc, workArea);
            BigNestedIntArray.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null);

            TermEnum   tenum              = null;
            TermDocs   tdoc               = null;
            var        list               = (listFactory == null ? new TermStringList() : listFactory.CreateTermList());
            List <int> minIDList          = new List <int>();
            List <int> maxIDList          = new List <int>();
            List <int> freqList           = new List <int>();
            OpenBitSet bitset             = new OpenBitSet(maxdoc + 1);
            int        negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName));
            int        t = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;

            _overflow = false;

            string pre = null;

            int df    = 0;
            int minID = -1;
            int maxID = -1;
            int valId = 0;

            try
            {
                tdoc  = reader.TermDocs();
                tenum = reader.Terms(new Term(fieldName, ""));
                if (tenum != null)
                {
                    do
                    {
                        Term term = tenum.Term;
                        if (term == null || !fieldName.Equals(term.Field))
                        {
                            break;
                        }

                        string val = term.Text;

                        if (val != null)
                        {
                            int      weight = 0;
                            string[] split  = val.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries);
                            if (split.Length > 1)
                            {
                                val    = split[0];
                                weight = int.Parse(split[split.Length - 1]);
                            }
                            if (pre == null || !val.Equals(pre))
                            {
                                if (pre != null)
                                {
                                    freqList.Add(df);
                                    minIDList.Add(minID);
                                    maxIDList.Add(maxID);
                                }

                                list.Add(val);

                                df    = 0;
                                minID = -1;
                                maxID = -1;
                                valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                                t++;
                            }

                            tdoc.Seek(tenum);
                            if (tdoc.Next())
                            {
                                df++;
                                int docid = tdoc.Doc;

                                if (!loader.Add(docid, valId))
                                {
                                    LogOverflow(fieldName);
                                }
                                else
                                {
                                    weightLoader.Add(docid, weight);
                                }

                                if (docid < minID)
                                {
                                    minID = docid;
                                }
                                bitset.FastSet(docid);
                                while (tdoc.Next())
                                {
                                    df++;
                                    docid = tdoc.Doc;

                                    if (!loader.Add(docid, valId))
                                    {
                                        LogOverflow(fieldName);
                                    }
                                    else
                                    {
                                        weightLoader.Add(docid, weight);
                                    }

                                    bitset.FastSet(docid);
                                }
                                if (docid > maxID)
                                {
                                    maxID = docid;
                                }
                            }
                            pre = val;
                        }
                    }while (tenum.Next());
                    if (pre != null)
                    {
                        freqList.Add(df);
                        minIDList.Add(minID);
                        maxIDList.Add(maxID);
                    }
                }
            }
            finally
            {
                try
                {
                    if (tdoc != null)
                    {
                        tdoc.Dispose();
                    }
                }
                finally
                {
                    if (tenum != null)
                    {
                        tenum.Dispose();
                    }
                }
            }

            list.Seal();

            try
            {
                _nestedArray.Load(maxdoc + 1, loader);
                _weightArray.Load(maxdoc + 1, weightLoader);
            }
            catch (System.IO.IOException e)
            {
                throw e;
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.valArray = list;
            this.freqs    = freqList.ToArray();
            this.minIDs   = minIDList.ToArray();
            this.maxIDs   = maxIDList.ToArray();

            int doc = 0;

            while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc <= maxdoc)
            {
                this.minIDs[0] = doc;
                doc            = maxdoc;
                while (doc > 0 && !_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality();
        }
예제 #2
0
        /// <summary>
        /// Initializes a new instance of <see cref="SearchBit"/>.
        /// </summary>
        /// <param name="openBitSet">The open bit set.</param>
        public SearchBit(OpenBitSet openBitSet)
        {
            Prevent.ParameterNull(openBitSet, nameof(openBitSet));

            _openBitSet = openBitSet;
        }
예제 #3
0
 internal AssertingPostingsConsumer(PostingsConsumer @in, FieldInfo fieldInfo, OpenBitSet visitedDocs)
 {
     this.@in         = @in;
     this.fieldInfo   = fieldInfo;
     this.visitedDocs = visitedDocs;
 }
예제 #4
0
        /// <summary>
        /// Search a single file
        /// </summary>

        void SearchSingleFile(int fi)
        {
            StructSearchMatch sm = null;

            AssertMx.IsNotNull(FpDao, "FpDao");

            List <StructSearchMatch> matchList = FileMatchLists[fi];

            AssertMx.IsNotNull(matchList, "matchList");

            OpenBitSet queryObs = new OpenBitSet(QueryFpLongArray, QueryFpLongArray.Length);

            AssertMx.IsNotNull(queryObs, "queryObs");

            OpenBitSet dbObs = new OpenBitSet(QueryFpLongArray, QueryFpLongArray.Length);             // gets set to DB fp for intersect

            AssertMx.IsNotNull(dbObs, "dbObs");

            FileStream fs = FileStreamReaders[fi];

            AssertMx.IsNotNull(fs, "fs");

            ReadFingerprintRecArgs a = new ReadFingerprintRecArgs();

            a.Initialize(fs, QueryFpLongArray.Length);

            try
            {
                while (true)
                {
                    bool readOk = FpDao.ReadRawFingerprintRec(a);
                    if (!readOk)
                    {
                        break;
                    }

                    //if (IsSrcCidMatch("03435269", a)) a = a;  // debug

                    dbObs.Bits = a.fingerprint;
                    dbObs.Intersect(queryObs);
                    int   commonCnt = (int)dbObs.Cardinality();
                    float simScore  = commonCnt / (float)(a.cardinality + QueryFpCardinality - commonCnt);

                    if (simScore >= MinimumSimilarity)
                    {
                        sm            = ReadFingerprintRec_To_StructSearchMatch(a);
                        sm.SearchType = StructureSearchType.MolSim;
                        sm.MatchScore = simScore;

                        matchList.Add(sm);
                    }
                }
            }

            catch (Exception ex)
            {
                string msg = ex.Message;
                msg += string.Format("\r\nfi: {0}, fs.Name: {1}, sm: {2}", fi, fs.Name, sm != null ? sm.Serialize() : "");
                DebugLog.Message(DebugLog.FormatExceptionMessage(ex, msg));
                throw new Exception(msg, ex);
            }

            return;
        }
예제 #5
0
 private int FindIn(OpenBitSet OpenBitSet, int baseVal, int val)
 {
     return(-1);
 }
예제 #6
0
 /// <summary>
 /// Returns <c>true</c> if the given string is accepted by the automaton.
 /// <para/>
 /// Complexity: linear in the length of the string.
 /// <para/>
 /// <b>Note:</b> for full performance, use the <see cref="RunAutomaton"/> class.
 /// </summary>
 public static bool Run(Automaton a, string s)
 {
     if (a.IsSingleton)
     {
         return(s.Equals(a.singleton, StringComparison.Ordinal));
     }
     if (a.deterministic)
     {
         State p = a.initial;
         int   cp; // LUCENENET: Removed unnecessary assignment
         for (int i = 0; i < s.Length; i += Character.CharCount(cp))
         {
             State q = p.Step(cp = Character.CodePointAt(s, i));
             if (q == null)
             {
                 return(false);
             }
             p = q;
         }
         return(p.accept);
     }
     else
     {
         State[]            states   = a.GetNumberedStates();
         LinkedList <State> pp       = new LinkedList <State>();
         LinkedList <State> pp_other = new LinkedList <State>();
         OpenBitSet         bb       = new OpenBitSet(states.Length);
         OpenBitSet         bb_other = new OpenBitSet(states.Length);
         pp.AddLast(a.initial);
         List <State> dest   = new List <State>();
         bool         accept = a.initial.accept;
         int          c; // LUCENENET: Removed unnecessary assignment
         for (int i = 0; i < s.Length; i += Character.CharCount(c))
         {
             c      = Character.CodePointAt(s, i);
             accept = false;
             pp_other.Clear();
             bb_other.Clear(0, bb_other.Length - 1);
             foreach (State p in pp)
             {
                 dest.Clear();
                 p.Step(c, dest);
                 foreach (State q in dest)
                 {
                     if (q.accept)
                     {
                         accept = true;
                     }
                     if (!bb_other.Get(q.number))
                     {
                         bb_other.Set(q.number);
                         pp_other.AddLast(q);
                     }
                 }
             }
             LinkedList <State> tp = pp;
             pp       = pp_other;
             pp_other = tp;
             OpenBitSet tb = bb;
             bb       = bb_other;
             bb_other = tb;
         }
         return(accept);
     }
 }
예제 #7
0
        /// <summary>
        /// Minimizes the given automaton using Hopcroft's algorithm.
        /// </summary>
        public static void MinimizeHopcroft(Automaton a)
        {
            a.Determinize();
            if (a.initial.numTransitions == 1)
            {
                Transition t = a.initial.TransitionsArray[0];
                if (t.to == a.initial && t.min == Character.MinCodePoint && t.max == Character.MaxCodePoint)
                {
                    return;
                }
            }
            a.Totalize();

            // initialize data structures
            int[]   sigma = a.GetStartPoints();
            State[] states = a.GetNumberedStates();
            int     sigmaLen = sigma.Length, statesLen = states.Length;

            JCG.List <State>[,] reverse = new JCG.List <State> [statesLen, sigmaLen];
            ISet <State>[]     partition  = new JCG.HashSet <State> [statesLen];
            JCG.List <State>[] splitblock = new JCG.List <State> [statesLen];
            int[] block = new int[statesLen];
            StateList[,] active      = new StateList[statesLen, sigmaLen];
            StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen];
            Queue <Int32Pair> pending = new Queue <Int32Pair>(); // LUCENENET specific - Queue is much more performant than LinkedList
            OpenBitSet        pending2 = new OpenBitSet(sigmaLen * statesLen);
            OpenBitSet        split = new OpenBitSet(statesLen),
                              refine = new OpenBitSet(statesLen), refine2 = new OpenBitSet(statesLen);

            for (int q = 0; q < statesLen; q++)
            {
                splitblock[q] = new JCG.List <State>();
                partition[q]  = new JCG.HashSet <State>();
                for (int x = 0; x < sigmaLen; x++)
                {
                    active[q, x] = new StateList();
                }
            }
            // find initial partition and reverse edges
            for (int q = 0; q < statesLen; q++)
            {
                State qq = states[q];
                int   j  = qq.accept ? 0 : 1;
                partition[j].Add(qq);
                block[q] = j;
                for (int x = 0; x < sigmaLen; x++)
                {
                    //JCG.List<State>[] r = reverse[qq.Step(sigma[x]).number];
                    var r = qq.Step(sigma[x]).number;
                    if (reverse[r, x] is null)
                    {
                        reverse[r, x] = new JCG.List <State>();
                    }
                    reverse[r, x].Add(qq);
                }
            }
            // initialize active sets
            for (int j = 0; j <= 1; j++)
            {
                for (int x = 0; x < sigmaLen; x++)
                {
                    foreach (State qq in partition[j])
                    {
                        if (reverse[qq.number, x] != null)
                        {
                            active2[qq.number, x] = active[j, x].Add(qq);
                        }
                    }
                }
            }
            // initialize pending
            for (int x = 0; x < sigmaLen; x++)
            {
                int j = (active[0, x].Count <= active[1, x].Count) ? 0 : 1;
                pending.Enqueue(new Int32Pair(j, x));
                pending2.Set(x * statesLen + j);
            }
            // process pending until fixed point
            int k = 2;

            while (pending.Count > 0)
            {
                Int32Pair ip = pending.Dequeue();
                int       p  = ip.n1;
                int       x  = ip.n2;
                pending2.Clear(x * statesLen + p);
                // find states that need to be split off their blocks
                for (StateListNode m = active[p, x].First; m != null; m = m.Next)
                {
                    JCG.List <State> r = reverse[m.Q.number, x];
                    if (r != null)
                    {
                        foreach (State s in r)
                        {
                            int i = s.number;
                            if (!split.Get(i))
                            {
                                split.Set(i);
                                int j = block[i];
                                splitblock[j].Add(s);
                                if (!refine2.Get(j))
                                {
                                    refine2.Set(j);
                                    refine.Set(j);
                                }
                            }
                        }
                    }
                }
                // refine blocks
                for (int j = refine.NextSetBit(0); j >= 0; j = refine.NextSetBit(j + 1))
                {
                    JCG.List <State> sb = splitblock[j];
                    if (sb.Count < partition[j].Count)
                    {
                        ISet <State> b1 = partition[j];
                        ISet <State> b2 = partition[k];
                        foreach (State s in sb)
                        {
                            b1.Remove(s);
                            b2.Add(s);
                            block[s.number] = k;
                            for (int c = 0; c < sigmaLen; c++)
                            {
                                StateListNode sn = active2[s.number, c];
                                if (sn != null && sn.Sl == active[j, c])
                                {
                                    sn.Remove();
                                    active2[s.number, c] = active[k, c].Add(s);
                                }
                            }
                        }
                        // update pending
                        for (int c = 0; c < sigmaLen; c++)
                        {
                            int aj = active[j, c].Count, ak = active[k, c].Count, ofs = c * statesLen;
                            if (!pending2.Get(ofs + j) && 0 < aj && aj <= ak)
                            {
                                pending2.Set(ofs + j);
                                pending.Enqueue(new Int32Pair(j, c));
                            }
                            else
                            {
                                pending2.Set(ofs + k);
                                pending.Enqueue(new Int32Pair(k, c));
                            }
                        }
                        k++;
                    }
                    refine2.Clear(j);
                    foreach (State s in sb)
                    {
                        split.Clear(s.number);
                    }
                    sb.Clear();
                }
                refine.Clear(0, refine.Length);
            }
            // make a new state for each equivalence class, set initial state
            State[] newstates = new State[k];
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = new State();
                newstates[n] = s;
                foreach (State q in partition[n])
                {
                    if (q == a.initial)
                    {
                        a.initial = s;
                    }
                    s.accept = q.accept;
                    s.number = q.number; // select representative
                    q.number = n;
                }
            }
            // build transitions and set acceptance
            for (int n = 0; n < newstates.Length; n++)
            {
                State s = newstates[n];
                s.accept = states[s.number].accept;
                foreach (Transition t in states[s.number].GetTransitions())
                {
                    s.AddTransition(new Transition(t.min, t.max, newstates[t.to.number]));
                }
            }
            a.ClearNumberedStates();
            a.RemoveDeadTransitions();
        }
예제 #8
0
 public BitSetRandomAccessDocIdSet(bool multi, MultiValueFacetDataCache multiCache, OpenBitSet openBitSet, FacetDataCache dataCache)
 {
     m_multi      = multi;
     m_multiCache = multiCache;
     m_openBitSet = openBitSet;
     m_dataCache  = dataCache;
 }
예제 #9
0
        public virtual void UpdateParams(OpenBitSet @set)
        {
            _b = GetBitSlice(@set, 0, BYTE_MASK);

            _exceptionOffset = HEADER_MASK + _b * _batchSize;
        }
예제 #10
0
        public override void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, BoboSegmentReader.WorkArea workArea)
        {
#if FEATURE_STRING_INTERN
            string field = string.Intern(fieldName);
#else
            string field = fieldName;
#endif
            int maxdoc = reader.MaxDoc;
            BigNestedInt32Array.BufferedLoader loader       = GetBufferedLoader(maxdoc, workArea);
            BigNestedInt32Array.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null);

            var        list               = (listFactory == null ? new TermStringList() : listFactory.CreateTermList());
            List <int> minIDList          = new List <int>();
            List <int> maxIDList          = new List <int>();
            List <int> freqList           = new List <int>();
            OpenBitSet bitset             = new OpenBitSet(maxdoc + 1);
            int        negativeValueCount = GetNegativeValueCount(reader, field);
            int        t = 1; // valid term id starts from 1
            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);

            m_overflow = false;

            string pre = null;

            int df    = 0;
            int minID = -1;
            int maxID = -1;
            int docID = -1;
            int valId = 0;

            Terms terms = reader.GetTerms(field);
            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);
                BytesRef  text;
                while ((text = termsEnum.Next()) != null)
                {
                    string   strText = text.Utf8ToString();
                    string   val     = null;
                    int      weight  = 0;
                    string[] split   = strText.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries);
                    if (split.Length > 1)
                    {
                        val    = split[0];
                        weight = int.Parse(split[split.Length - 1]);
                    }
                    else
                    {
                        continue;
                    }

                    if (pre == null || !val.Equals(pre))
                    {
                        if (pre != null)
                        {
                            freqList.Add(df);
                            minIDList.Add(minID);
                            maxIDList.Add(maxID);
                        }
                        list.Add(val);
                        df    = 0;
                        minID = -1;
                        maxID = -1;
                        valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                        t++;
                    }

                    Term     term     = new Term(field, strText);
                    DocsEnum docsEnum = reader.GetTermDocsEnum(term);
                    if (docsEnum != null)
                    {
                        while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                        {
                            df++;

                            if (!loader.Add(docID, valId))
                            {
                                LogOverflow(fieldName);
                            }
                            else
                            {
                                weightLoader.Add(docID, weight);
                            }

                            if (docID < minID)
                            {
                                minID = docID;
                            }
                            bitset.FastSet(docID);
                            while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS)
                            {
                                docID = docsEnum.DocID;
                                df++;
                                if (!loader.Add(docID, valId))
                                {
                                    LogOverflow(fieldName);
                                }
                                else
                                {
                                    weightLoader.Add(docID, weight);
                                }
                                bitset.FastSet(docID);
                            }
                            if (docID > maxID)
                            {
                                maxID = docID;
                            }
                        }
                    }
                    pre = val;
                }
                if (pre != null)
                {
                    freqList.Add(df);
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                }
            }

            list.Seal();

            try
            {
                m_nestedArray.Load(maxdoc + 1, loader);
                m_weightArray.Load(maxdoc + 1, weightLoader);
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.m_valArray = list;
            this.m_freqs    = freqList.ToArray();
            this.m_minIDs   = minIDList.ToArray();
            this.m_maxIDs   = maxIDList.ToArray();

            int doc = 0;
            while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc < maxdoc)
            {
                this.m_minIDs[0] = doc;
                doc = maxdoc - 1;
                while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                this.m_maxIDs[0] = doc;
            }
            this.m_freqs[0] = maxdoc - (int)bitset.Cardinality();
        }
 public BitSetCollector(OpenBitSet bitSet)
 {
     _bitSet = bitSet;
 }
예제 #12
0
 public bool IsSubsetOf(BitPointSet other)
 {
     return(_cardinality == OpenBitSet.intersectionCount(_points, other._points));
 }
예제 #13
0
 public BitPointSet(int maxPoints)
 {
     _points = new OpenBitSet(maxPoints);
 }
예제 #14
0
 ///<summary>Method to decompress the entire batch
 ///   *  </summary>
 ///   * <param name="blob"> OpenBitSet </param>
 ///   * <returns> int array with decompressed segment of numbers </returns>
 protected internal virtual int[] Decompress(OpenBitSet blob)
 {
     return(new P4DSetNoBase().Decompress(blob));
 }
예제 #15
0
 public ThreadClassAnonymousHelper(TestTimeLimitingCollector outerInstance, OpenBitSet success, bool withTimeout, int num)
 {
     this.outerInstance = outerInstance;
     this.success       = success;
     this.withTimeout   = withTimeout;
     this.num           = num;
 }
예제 #16
0
 internal AssertingPostingsConsumer(PostingsConsumer @in, FieldInfo fieldInfo, OpenBitSet visitedDocs)
 {
     this.@in = @in;
     this.fieldInfo = fieldInfo;
     this.VisitedDocs = visitedDocs;
 }
        /// <summary>
        /// loads multi-value facet data. This method uses a workarea to prepare loading.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <param name="listFactory"></param>
        /// <param name="workArea"></param>
        public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, BoboSegmentReader.WorkArea workArea)
        {
#if FEATURE_STRING_INTERN
            string field = string.Intern(fieldName);
#else
            string field = fieldName;
#endif
            int maxdoc = reader.MaxDoc;
            BigNestedInt32Array.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);

            ITermValueList list               = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList());
            List <int>     minIDList          = new List <int>();
            List <int>     maxIDList          = new List <int>();
            List <int>     freqList           = new List <int>();
            OpenBitSet     bitset             = new OpenBitSet(maxdoc + 1);
            int            negativeValueCount = GetNegativeValueCount(reader, field);
            int            t = 1; // valid term id starts from 1
            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);

            m_overflow = false;
            Terms terms = reader.GetTerms(field);
            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);
                BytesRef  text;
                while ((text = termsEnum.Next()) != null)
                {
                    string strText = text.Utf8ToString();
                    list.Add(strText);

                    Term     term     = new Term(field, strText);
                    DocsEnum docsEnum = reader.GetTermDocsEnum(term);
                    int      df       = 0;
                    int      minID    = -1;
                    int      maxID    = -1;
                    int      docID    = -1;
                    int      valId    = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        df++;
                        if (!loader.Add(docID, valId))
                        {
                            LogOverflow(fieldName);
                        }
                        minID = docID;
                        bitset.FastSet(docID);
                        while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS)
                        {
                            docID = docsEnum.DocID;
                            df++;
                            if (!loader.Add(docID, valId))
                            {
                                LogOverflow(fieldName);
                            }
                            bitset.FastSet(docID);
                        }
                        maxID = docID;
                    }
                    freqList.Add(df);
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                    t++;
                }
            }

            list.Seal();

            try
            {
                m_nestedArray.Load(maxdoc + 1, loader);
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.m_valArray = list;
            this.m_freqs    = freqList.ToArray();
            this.m_minIDs   = minIDList.ToArray();
            this.m_maxIDs   = maxIDList.ToArray();

            int doc = 0;
            while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc < maxdoc)
            {
                this.m_minIDs[0] = doc;
                doc = maxdoc - 1;
                while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                this.m_maxIDs[0] = doc;
            }
            this.m_freqs[0] = maxdoc - (int)bitset.Cardinality();
        }
예제 #18
0
 public SearchBits(OpenBitSet openBitSet)
 {
     _openBitSet = openBitSet;
 }
예제 #19
0
        public override DocIdSet GetDocIdSet(Index.IndexReader reader /*, Bits acceptDocs*/, IState state)
        {
            var bits  = new OpenBitSet(reader.MaxDoc);
            var terms = new TermsEnumCompatibility(reader, fieldName, state);
            var term  = terms.Next(state);

            if (term == null)
            {
                return(null);
            }
            Node scanCell = null;

            //cells is treated like a stack. LinkedList conveniently has bulk add to beginning. It's in sorted order so that we
            //  always advance forward through the termsEnum index.
            var cells = new LinkedList <Node>(
                grid.GetWorldNode().GetSubCells(queryShape));

            //This is a recursive algorithm that starts with one or more "big" cells, and then recursively dives down into the
            // first such cell that intersects with the query shape.  It's a depth first traversal because we don't move onto
            // the next big cell (breadth) until we're completely done considering all smaller cells beneath it. For a given
            // cell, if it's *within* the query shape then we can conveniently short-circuit the depth traversal and
            // grab all documents assigned to this cell/term.  For an intersection of the cell and query shape, we either
            // recursively step down another grid level or we decide heuristically (via prefixGridScanLevel) that there aren't
            // that many points, and so we scan through all terms within this cell (i.e. the term starts with the cell's term),
            // seeing which ones are within the query shape.
            while (cells.Count > 0)
            {
                Node cell     = cells.First.Value; cells.RemoveFirst();
                var  cellTerm = cell.GetTokenString();
                var  seekStat = terms.Seek(cellTerm, state);
                if (seekStat == TermsEnumCompatibility.SeekStatus.END)
                {
                    break;
                }
                if (seekStat == TermsEnumCompatibility.SeekStatus.NOT_FOUND)
                {
                    continue;
                }
                if (cell.GetLevel() == detailLevel || cell.IsLeaf())
                {
                    terms.Docs(bits, state);
                }
                else
                {                //any other intersection
                    //If the next indexed term is the leaf marker, then add all of them
                    var nextCellTerm = terms.Next(state);
                    Debug.Assert(nextCellTerm.Text.StartsWith(cellTerm));
                    scanCell = grid.GetNode(nextCellTerm.Text, scanCell);
                    if (scanCell.IsLeaf())
                    {
                        terms.Docs(bits, state);
                        term = terms.Next(state);                        //move pointer to avoid potential redundant addDocs() below
                    }

                    //Decide whether to continue to divide & conquer, or whether it's time to scan through terms beneath this cell.
                    // Scanning is a performance optimization trade-off.
                    bool scan = cell.GetLevel() >= prefixGridScanLevel;                    //simple heuristic

                    if (!scan)
                    {
                        //Divide & conquer
                        var lst = cell.GetSubCells(queryShape);
                        for (var i = lst.Count - 1; i >= 0; i--)                         //add to beginning
                        {
                            cells.AddFirst(lst[i]);
                        }
                    }
                    else
                    {
                        //Scan through all terms within this cell to see if they are within the queryShape. No seek()s.
                        for (var t = terms.Term(); t != null && t.Text.StartsWith(cellTerm); t = terms.Next(state))
                        {
                            scanCell = grid.GetNode(t.Text, scanCell);
                            int termLevel = scanCell.GetLevel();
                            if (termLevel > detailLevel)
                            {
                                continue;
                            }
                            if (termLevel == detailLevel || scanCell.IsLeaf())
                            {
                                Shape cShape;
                                if (termLevel == grid.GetMaxLevels() && queryShape.HasArea())
                                {
                                    //TODO should put more thought into implications of box vs point
                                    cShape = scanCell.GetCenter();
                                }
                                else
                                {
                                    cShape = scanCell.GetShape();
                                }
                                if (queryShape.Relate(cShape) == SpatialRelation.DISJOINT)
                                {
                                    continue;
                                }

                                terms.Docs(bits, state);
                            }
                        }                        //term loop
                    }
                }
            }            //cell loop

            return(bits);
        }
예제 #20
0
 public int FindValues(OpenBitSet values, int id, int maxID)
 {
     return(FindValues(values, id, maxID, false));
 }
예제 #21
0
 public OBSDocIdSet(int length)
 {
     bitSet = new OpenBitSet(length);
 }
예제 #22
0
 public SpecialsComparator(OpenBitSet docValues)
 {
     _docValues = docValues;
 }
예제 #23
0
 public abstract int FindValues(OpenBitSet bitset, int id, int maxId);
예제 #24
0
 ///<summary>Internal Decompression Method</summary>
 private int[] decompress(OpenBitSet packedSet)
 {
     Console.Error.WriteLine("Method not implemented");
     return(null);
 }