示例#1
0
        unsafe private void Calculate(DocumentResultWhereDictionary upDict,
                                      ref DocumentResultWhereDictionary docIdRank, WordIndexForQuery[] wordIndexes)
        {
            Array.Sort(wordIndexes);

            MinResultCount = _DBProvider.Table.GroupByLimit;

            //Get max word doc list count
            int minWordDocListCount = 1 * 1024 * 1024; //1M

            foreach (WordIndexForQuery wifq in wordIndexes)
            {
                minWordDocListCount = Math.Min(minWordDocListCount, wifq.WordIndex.WordDocList.Count);
            }

            if (docIdRank.Count == 0)
            {
                if (minWordDocListCount > DocumentResultWhereDictionary.DefaultSize)
                {
                    docIdRank = new Core.SFQL.Parse.DocumentResultWhereDictionary(minWordDocListCount);
                }
            }

            Query.PerformanceReport performanceReport = new Hubble.Core.Query.PerformanceReport("Calculate");

            //Merge
            bool oneWordOptimize = this._QueryParameter.CanLoadPartOfDocs && this._QueryParameter.NoAndExpression && wordIndexes.Length == 1;
            int  oneWordMaxCount = 0;

            if (oneWordOptimize)
            {
                //One word
                WordIndexForQuery wifq = wordIndexes[0]; //first word

                //Entity.DocumentPositionList[] wifqDocBuf = wifq.WordIndex.DocPositionBuf;

                Entity.DocumentPositionList docList = wifq.WordIndex.GetNext();
                int j = 0;

                while (docList.DocumentId >= 0)
                {
                    //Entity.DocumentPositionList docList = wifq.WordIndex[j];

                    Core.SFQL.Parse.DocumentResultPoint drp;
                    drp.pDocumentResult = null;

                    if (j > MinResultCount)
                    {
                        if (oneWordMaxCount > docList.Count)
                        {
                            j++;
                            docList = wifq.WordIndex.GetNext();

                            continue;
                        }
                    }
                    else
                    {
                        if (oneWordMaxCount < docList.Count)
                        {
                            oneWordMaxCount = docList.Count;
                        }
                    }

                    long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument);

                    if (score < 0)
                    {
                        //Overflow
                        score = long.MaxValue - 4000000;
                    }

                    if (upDict == null)
                    {
                        docIdRank.Add(docList.DocumentId, score);
                    }
                    else
                    {
                        if (!upDict.Not)
                        {
                            if (upDict.ContainsKey(docList.DocumentId))
                            {
                                docIdRank.Add(docList.DocumentId, score);
                            }
                        }
                        else
                        {
                            if (!upDict.ContainsKey(docList.DocumentId))
                            {
                                docIdRank.Add(docList.DocumentId, score);
                            }
                        }
                    }

                    j++;
                    docList = wifq.WordIndex.GetNext();
                }
            }
            else
            {
                int wordIndexesLen = wordIndexes.Length;

                WordIndexForQuery fstWifq = wordIndexes[0]; //first word

                Entity.DocumentPositionList fstDocList = fstWifq.WordIndex.GetNext();

                Entity.DocumentPositionList[] docListArr = new Hubble.Core.Entity.DocumentPositionList[wordIndexesLen];

                docListArr[0] = fstDocList;

                while (fstDocList.DocumentId >= 0)
                {
                    int curWord    = 1;
                    int firstDocId = fstDocList.DocumentId;

                    while (curWord < wordIndexesLen)
                    {
                        docListArr[curWord] = wordIndexes[curWord].WordIndex.Get(firstDocId);

                        if (docListArr[curWord].DocumentId < 0)
                        {
                            break;
                        }

                        curWord++;
                    } //While

                    if (curWord >= wordIndexesLen)
                    {
                        //Matched

                        long totalScore = 0;
                        for (int i = 0; i < wordIndexesLen; i++)
                        {
                            WordIndexForQuery           wifq    = wordIndexes[i];
                            Entity.DocumentPositionList docList = docListArr[i];

                            long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument);

                            if (score < 0)
                            {
                                //Overflow
                                score = long.MaxValue - 4000000;
                            }

                            totalScore += score;
                        }

                        if (upDict == null)
                        {
                            docIdRank.Add(firstDocId, totalScore);
                        }
                        else
                        {
                            if (!upDict.Not)
                            {
                                if (upDict.ContainsKey(firstDocId))
                                {
                                    docIdRank.Add(firstDocId, totalScore);
                                }
                            }
                            else
                            {
                                if (!upDict.ContainsKey(firstDocId))
                                {
                                    docIdRank.Add(firstDocId, totalScore);
                                }
                            }
                        }
                    }

                    fstDocList    = fstWifq.WordIndex.GetNext();
                    docListArr[0] = fstDocList;
                }
            }

            //Merge score if upDict != null
            if (upDict != null)
            {
                if (!upDict.Not)
                {
                    foreach (int docid in docIdRank.Keys)
                    {
                        DocumentResult *upDrp;

                        if (upDict.TryGetValue(docid, out upDrp))
                        {
                            DocumentResult *drpResult;
                            if (docIdRank.TryGetValue(docid, out drpResult))
                            {
                                drpResult->Score += upDrp->Score;
                            }
                        }
                    }
                }
            }

            DeleteProvider delProvider = _DBProvider.DelProvider;
            int            delCount    = delProvider.Filter(docIdRank);

            if (oneWordOptimize && _QueryParameter.CanLoadPartOfDocs && upDict == null)
            {
                docIdRank.RelTotalCount = wordIndexes[0].RelTotalCount - delCount;
            }
            else
            {
                docIdRank.RelTotalCount = docIdRank.Count;
            }

            performanceReport.Stop();
        }
示例#2
0
        unsafe private void CalculateWithPositionOrderByScoreDesc(Core.SFQL.Parse.DocumentResultWhereDictionary upDict,
                                                                  ref Core.SFQL.Parse.DocumentResultWhereDictionary docIdRank, WordIndexForQuery[] wordIndexes)
        {
            DBProvider dbProvider = Argument.DBProvider;

            bool           needFilterUntokenizedConditions = this.Argument.NeedFilterUntokenizedConditions;
            ExpressionTree untokenizedTree = this.Argument.UntokenizedTreeOnRoot;

            if (upDict != null)
            {
                throw new ParseException("UpDict is not null!");
            }

            //Calculate top
            int top;

            if (Argument.End >= 0)
            {
                top = (1 + Argument.End / 100) * 100;

                if (top <= 0)
                {
                    top = 100;
                }
            }
            else
            {
                top = int.MaxValue;
            }

            PriorQueue <Docid2Long> priorQueue     = null;
            List <Docid2Long>       docid2longList = null;

            if (top == int.MaxValue)
            {
                docid2longList = new List <Docid2Long>();
            }
            else
            {
                priorQueue = new PriorQueue <Docid2Long>(top, new DocIdLongComparer(false));
            }

            long lastMinScore = 0;
            int  rows         = 0;

            Core.SFQL.Parse.DocumentResultWhereDictionary groupByDict = Argument.NeedGroupBy ? docIdRank : null;


            MultiWordsDocIdEnumerator mwde = new MultiWordsDocIdEnumerator(wordIndexes, dbProvider, groupByDict, -1,
                                                                           needFilterUntokenizedConditions); //Changed at 2012-3-18, top optimize will effect search result, disable it.

            //MultiWordsDocIdEnumerator mwde = new MultiWordsDocIdEnumerator(wordIndexes, dbProvider, groupByDict, top,
            //    needFilterUntokenizedConditions);

            Entity.OriginalDocumentPositionList odpl = new Hubble.Core.Entity.OriginalDocumentPositionList();

            mwde.GetNextOriginal(ref odpl);

            Entity.DocumentPositionList lastDocList
                = new Hubble.Core.Entity.DocumentPositionList();

            double ratio = 1;

            if (wordIndexes.Length > 1)
            {
                ratio = (double)2 / (double)(wordIndexes.Length - 1);
            }

            Query.DocumentResult  documentResult;
            Query.DocumentResult *drp = &documentResult;
            int skipCount             = 0; //skip by filter untokenized conditions

            while (odpl.DocumentId >= 0)
            {
                //Process untokenized conditions.
                //If is not matched, get the next one.
                if (needFilterUntokenizedConditions)
                {
                    int docId = odpl.DocumentId;
                    drp->DocId       = docId;
                    drp->PayloadData = dbProvider.GetPayloadDataWithShareLock(docId);
                    if (!ParseWhere.GetComparisionExpressionValue(dbProvider, drp,
                                                                  untokenizedTree))
                    {
                        mwde.GetNextOriginal(ref odpl);
                        skipCount++;
                        continue;
                    }
                }

                //Matched
                //Caculate score
                #region Caclate score

                long totalScore = 0;
                lastDocList.Count         = 0;
                lastDocList.FirstPosition = 0;
                int lastWifqIndex = 0;

                for (int i = 0; i < mwde.SelectedCount; i++)
                {
                    int index = mwde.SelectedIndexes[i];

                    WordIndexForQuery wifq = mwde.WordIndexes[index];

                    Int16 count                    = (Int16)mwde.SelectedDocLists[i].Count;
                    int   firstPosition            = mwde.SelectedDocLists[i].FirstPosition;
                    int   totalWordsInThisDocument = mwde.SelectedDocLists[i].TotalWordsInThisDocument;

                    long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)count * (long)1000000 / ((long)wifq.Sum_d_t * (long)totalWordsInThisDocument);

                    if (score < 0)
                    {
                        //Overflow
                        score = long.MaxValue - 4000000;
                    }

                    double delta = 1;

                    if (i > 0)
                    {
                        //Calculate with position
                        double queryPositionDelta = wifq.FirstPosition - wordIndexes[lastWifqIndex].FirstPosition;
                        double positionDelta      = firstPosition - lastDocList.FirstPosition;

                        delta = Math.Abs(queryPositionDelta - positionDelta);

                        if (delta < 0.031)
                        {
                            delta = 0.031;
                        }
                        else if (delta <= 1.1)
                        {
                            delta = 0.5;
                        }
                        else if (delta <= 2.1)
                        {
                            delta = 1;
                        }

                        delta = Math.Pow((1 / delta), ratio) * count * lastDocList.Count /
                                (double)(wifq.QueryCount * wordIndexes[lastWifqIndex].QueryCount);
                    }

                    lastDocList.Count         = count;
                    lastDocList.FirstPosition = firstPosition;
                    lastWifqIndex             = index;

                    totalScore += (long)(score * delta);
                } //End of score calculation

                if (_HasRankField)
                {
                    int rank = dbProvider.SharedPayloadProvider.GetPayloadRank(odpl.DocumentId);
                    totalScore *= rank;
                    if (totalScore < 0)
                    {
                        totalScore = long.MaxValue - 4000000;
                    }
                }

                //all of the words matched
                //10 times
                if (mwde.SelectedCount == wordIndexes.Length)
                {
                    totalScore *= 10;

                    if (totalScore < 0)
                    {
                        totalScore = long.MaxValue - 4000000;
                    }
                }

                #endregion

                //Insert to prior queue
                if (rows >= top)
                {
                    if (lastMinScore < totalScore)
                    {
                        priorQueue.Add(new Docid2Long(odpl.DocumentId, totalScore));
                        lastMinScore = priorQueue.Last.Value1;
                    }
                }
                else
                {
                    if (top == int.MaxValue)
                    {
                        docid2longList.Add(new Docid2Long(odpl.DocumentId, totalScore));
                    }
                    else
                    {
                        priorQueue.Add(new Docid2Long(odpl.DocumentId, totalScore));
                        rows++;

                        if (rows == top)
                        {
                            lastMinScore = priorQueue.Last.Value1;
                        }
                    }
                }

                mwde.GetNextOriginal(ref odpl);
            }

            docIdRank.RelTotalCount = mwde.TotalDocIdCount - skipCount;

            Docid2Long[] docid2longArr;

            if (top == int.MaxValue)
            {
                docid2longList.Sort(new DocIdLongComparer(false));
                docid2longArr = docid2longList.ToArray();
            }
            else
            {
                docid2longArr = priorQueue.ToArray();
            }

            foreach (Docid2Long docid2Long in docid2longArr)
            {
                long score = docid2Long.Value1;

                if (score < 0)
                {
                    //Overflow
                    score = long.MaxValue - 4000000;
                }

                docIdRank.Add(docid2Long.DocId, new DocumentResult(docid2Long.DocId, score));
            }

            docIdRank.Sorted = true;
        }