コード例 #1
0
            internal double calculateKwdRank(InverseList list, DocumentOccurrences d, int[] occ)
            {
                int    frequency = occ.Length;
                int    totalNumberOfDocuments = impl.documents.Count;
                int    nRelevantDocuments     = list.Count;
                int    totalNumberOfWords     = impl.inverseIndex.Count;
                double idf          = System.Math.Log((double)totalNumberOfDocuments / nRelevantDocuments);
                double averageWords = (double)totalNumberOfWords / totalNumberOfDocuments;
                double density      = frequency * System.Math.Log(1 + (DENSITY_MAGIC * averageWords / d.nWordsInDocument));
                double wordWeight   = (density * idf);
                double wordScore    = 1;

                for (int i = 0; i < frequency; i++)
                {
                    wordScore += wordWeight * occurrenceKindWeight[(uint)occ[i] >> OCC_KIND_OFFSET];
                }
                return(System.Math.Log(wordScore));
            }
コード例 #2
0
            internal virtual FullTextSearchResult SearchPrefix(string prefix, int maxResults, int timeLimit, bool sort)
            {
                const int TICKS_PER_MSEC = 10000;

                FullTextSearchHit[] hits = new FullTextSearchHit[maxResults];
                int  nResults            = 0;
                int  estimation          = 0;
                long stop = DateTime.Now.Ticks + (long)timeLimit * TICKS_PER_MSEC;

                foreach (InverseList list in impl.inverseIndex.StartsWith(prefix))
                {
                    IDictionaryEnumerator occurrences = list.GetDictionaryEnumerator(0);
                    estimation += list.Count;
                    while (occurrences.MoveNext())
                    {
                        int   doc  = (int)occurrences.Key;
                        float rank = 1.0f;
                        if (sort)
                        {
                            DocumentOccurrences d = (DocumentOccurrences)occurrences.Value;
                            rank = (float)calculateKwdRank(list, d, d.occurrences);
                        }
                        hits[nResults] = new FullTextSearchHit(impl.Storage, doc, rank);
                        if (++nResults >= maxResults || DateTime.Now.Ticks >= stop)
                        {
                            goto Done;
                        }
                    }
                }
Done:
                if (nResults < maxResults)
                {
                    FullTextSearchHit[] realHits = new FullTextSearchHit[nResults];
                    Array.Copy(hits, 0, realHits, 0, nResults);
                    hits = realHits;
                }
                if (sort)
                {
                    Array.Sort(hits);
                }
                return(new FullTextSearchResult(hits, estimation));
            }
コード例 #3
0
            internal InverseList(Storage db, int oid, DocumentOccurrences doc)
#if USE_GENERICS
                : base(true)
#else
                : base(typeof(int), true)
コード例 #4
0
            internal virtual double evaluate(int doc, FullTextQuery query)
            {
                double left, right;

                switch (query.op)
                {
                case FullTextQuery.Operator.Near:
                case FullTextQuery.Operator.And:
                    left         = evaluate(doc, ((FullTextQueryBinaryOp)query).left);
                    right        = evaluate(doc, ((FullTextQueryBinaryOp)query).right);
                    nOccurrences = 0;
                    return(left < 0 || right < 0 ? -1 : left + right);

                case FullTextQuery.Operator.Or:
                    left  = evaluate(doc, ((FullTextQueryBinaryOp)query).left);
                    right = evaluate(doc, ((FullTextQueryBinaryOp)query).right);
                    return(left > right ? left : right);

                case FullTextQuery.Operator.Match:
                case FullTextQuery.Operator.StrictMatch:
                {
                    KeywordList kwd = kwds[((FullTextQueryMatchOp)query).wno];
                    if (kwd.currDoc != doc)
                    {
                        return(-1);
                    }
                    DocumentOccurrences d = (DocumentOccurrences)kwd.currEntry.Value;
                    int[] occ             = d.occurrences;
                    kwd.occ = occ;
                    int frequency = occ.Length;
                    if (query.op == FullTextQuery.Operator.StrictMatch)
                    {
                        if (nOccurrences == 0)
                        {
                            nOccurrences = frequency;
                            if (occurrences == null || occurrences.Length < frequency)
                            {
                                occurrences = new int[frequency];
                            }
                            for (int i = 0; i < frequency; i++)
                            {
                                occurrences[i] = occ[i] & OCC_POSITION_MASK;
                            }
                        }
                        else
                        {
                            int   nPairs = 0;
                            int[] dst = occurrences;
                            int   occ1 = dst[0];
                            int   occ2 = occ[0] & OCC_POSITION_MASK;
                            int   i = 0, j = 0;
                            int   offs = kwd.kwdOffset;
                            while (true)
                            {
                                if (occ1 + offs <= occ2)
                                {
                                    if (occ1 + offs + 1 >= occ2)
                                    {
                                        dst[nPairs++] = occ2;
                                    }
                                    if (++j == nOccurrences)
                                    {
                                        break;
                                    }
                                    occ1 = dst[j];
                                }
                                else
                                {
                                    if (++i == frequency)
                                    {
                                        break;
                                    }
                                    occ2 = occ[i] & OCC_POSITION_MASK;
                                }
                            }
                            nOccurrences = nPairs;
                            if (nPairs == 0)
                            {
                                return(-1);
                            }
                        }
                    }
                    return(calculateKwdRank(kwd.list, d, occ));
                }

                case FullTextQuery.Operator.Not:
                {
                    double rank = evaluate(doc, ((FullTextQueryUnaryOp)query).opd);
                    return((rank >= 0) ? -1 : 0);
                }

                default:
                    return(-1);
                }
            }