Example #1
0
            public WordIndexForQuery(Index.WordIndexReader wordIndex,
                                     int totalDocuments, int wordRank, int fieldRank)
            {
                FieldRank     = fieldRank;
                WordRank      = wordRank;
                RelTotalCount = wordIndex.RelDocCount;

                if (FieldRank <= 0)
                {
                    FieldRank = 1;
                }

                if (WordRank <= 0)
                {
                    WordRank = 1;
                }


                if (wordIndex.Count <= 0)
                {
                    _CurIndex = -1;
                }
                else
                {
                    _CurIndex = 0;
                }

                _WordIndex = wordIndex;

                Sum_d_t           = (int)Math.Sqrt(_WordIndex.WordCount);
                Idf_t             = (int)Math.Log10((double)totalDocuments / (double)_WordIndex.Count + 1) + 1;
                CurDocIdIndex     = 0;
                WordIndexesLength = _WordIndex.Count;
            }
Example #2
0
        public WordIndexForQuery(Index.WordIndexReader wordIndex,
                                 int totalDocuments, int wordRank, int fieldRank, WordInfo.Flag flags)
        {
            FieldRank     = fieldRank;
            WordRank      = wordRank;
            RelTotalCount = wordIndex.RelDocCount;
            this.Flags    = flags;

            if (FieldRank <= 0)
            {
                FieldRank = 1;
            }

            if (WordRank <= 0)
            {
                WordRank = 1;
            }

            _WordIndex = wordIndex;

            Sum_d_t = (int)Math.Sqrt(_WordIndex.WordCount);
            Idf_t   = (int)Math.Log10((double)totalDocuments / (double)_WordIndex.RelDocCount + 1) + 1;
            //Idf_t = (int)Math.Log10((double)totalDocuments / (double)_WordIndex.Count + 1) + 1; //Old contains use this

            CurDocIdIndex     = 0;
            WordIndexesLength = _WordIndex.Count;
        }
Example #3
0
 public WordIndexForQuery(Index.WordIndexReader wordIndex,
                          int totalDocuments, int wordRank, int fieldRank)
     : this(wordIndex, totalDocuments, wordRank, fieldRank, WordInfo.Flag.None)
 {
 }
Example #4
0
        /// <summary>
        /// order by except only order by score desc.
        /// </summary>
        /// <param name="upDict"></param>
        /// <param name="docIdRank"></param>
        unsafe public void CalculateOptimizeNormalOrderBy(Core.SFQL.Parse.DocumentResultWhereDictionary upDict,
                                                          ref Core.SFQL.Parse.DocumentResultWhereDictionary docIdRank)
        {
            DBProvider dBProvider = Argument.DBProvider;

            Argument.DBProvider.SharedPayloadProvider.EnterPayloladShareLock();

            bool           needFilterUntokenizedConditions = this.Argument.NeedFilterUntokenizedConditions;
            ExpressionTree untokenizedTree = this.Argument.UntokenizedTreeOnRoot;

            Query.DocumentResult  documentResult;
            Query.DocumentResult *drp = &documentResult;

            bool orderByIncludingScore = Argument.OrderByIncludingScore();

            try
            {
                Field[] orderByFields;

                DocId2LongComparer comparer = DocId2LongComparer.Generate(
                    dBProvider, Argument.OrderBys, out orderByFields);

                bool needGroupBy = Argument.NeedGroupBy;

                WordIndexForQuery wifq = WordIndexes[0];
                _IndexReader = wifq.WordIndex.IndexReader;

                Data.Field rankField = Argument.DBProvider.GetField("Rank");

                if (rankField != null)
                {
                    if (rankField.DataType == Hubble.Core.Data.DataType.Int &&
                        rankField.IndexType == Hubble.Core.Data.Field.Index.Untokenized)
                    {
                        _HasRandField         = true;
                        _RankTab              = rankField.TabIndex;
                        _DocidPayloads        = new OriginalDocumentPositionList[2 * 1024];
                        _CurDocidPayloadIndex = _DocidPayloads.Length;
                    }
                }

                if (_IndexReader != null)
                {
                    int top;

                    //vars for delete
                    bool  haveRecordsDeleted = dBProvider.DelProvider.Count > 0;
                    int[] delDocs            = null;
                    int   curDelIndex        = 0;
                    int   curDelDocid        = 0;
                    int   groupByCount       = 0;
                    int   groupByLen         = dBProvider.Table.GroupByLimit;
                    int   groupByStep        = 1;
                    int   groupByIndex       = 0;

                    if (needGroupBy)
                    {
                        groupByStep = wifq.RelTotalCount / groupByLen;

                        if (groupByStep <= 0)
                        {
                            groupByStep = 1;
                        }
                    }

                    if (haveRecordsDeleted)
                    {
                        delDocs     = dBProvider.DelProvider.DelDocs;
                        curDelDocid = delDocs[curDelIndex];
                    }

                    try
                    {
                        //calculate top
                        //If less than 100, set to 100
                        if (this.Argument.End >= 0)
                        {
                            top = (1 + this.Argument.End / 100) * 100;

                            if (top <= 0)
                            {
                                top = 100;
                            }

                            //if (this.Argument.End * 2 > top)
                            //{
                            //    top *= 2;
                            //}
                        }
                        else
                        {
                            top = int.MaxValue;
                        }

                        PriorQueue <Docid2Long> priorQueue = new PriorQueue <Docid2Long>(top, comparer);
                        int rows = 0;

                        Entity.OriginalDocumentPositionList docList = new OriginalDocumentPositionList();

                        bool notEOF = GetNext(ref docList);

                        Index.WordIndexReader wordIndexReader = wifq.WordIndex;

                        Docid2Long last = new Docid2Long();
                        last.DocId = -1;

                        int relCount = 0;

                        while (notEOF)
                        {
                            //Process untokenized conditions.
                            //If is not matched, get the next one.
                            if (needFilterUntokenizedConditions)
                            {
                                int docId = docList.DocumentId;
                                drp->DocId       = docId;
                                drp->PayloadData = dBProvider.GetPayloadDataWithShareLock(docId);
                                if (!ParseWhere.GetComparisionExpressionValue(dBProvider, drp,
                                                                              untokenizedTree))
                                {
                                    notEOF = GetNext(ref docList);
                                    continue;
                                }
                            }

                            //Process deleted records
                            if (haveRecordsDeleted)
                            {
                                if (curDelIndex < delDocs.Length)
                                {
                                    //If docid deleted, get next
                                    if (docList.DocumentId == curDelDocid)
                                    {
                                        notEOF = GetNext(ref docList);
                                        continue;
                                    }
                                    else if (docList.DocumentId > curDelDocid)
                                    {
                                        while (curDelIndex < delDocs.Length && curDelDocid < docList.DocumentId)
                                        {
                                            curDelIndex++;

                                            if (curDelIndex >= delDocs.Length)
                                            {
                                                haveRecordsDeleted = false;
                                                break;
                                            }

                                            curDelDocid = delDocs[curDelIndex];
                                        }

                                        if (curDelIndex < delDocs.Length)
                                        {
                                            if (docList.DocumentId == curDelDocid)
                                            {
                                                notEOF = GetNext(ref docList);
                                                continue;
                                            }
                                        }
                                    }
                                }
                            }

                            if (needGroupBy)
                            {
                                if (groupByCount < groupByLen)
                                {
                                    if (groupByIndex >= groupByStep)
                                    {
                                        groupByIndex = 0;
                                    }

                                    if (groupByIndex == 0)
                                    {
                                        docIdRank.AddToGroupByCollection(docList.DocumentId);
                                        groupByCount++;
                                    }

                                    groupByIndex++;
                                }
                            }

                            relCount++;

                            Docid2Long cur = new Docid2Long();

                            if (rows >= top)
                            {
                                long score = 1;

                                if (orderByIncludingScore)
                                {
                                    int wordCount = docList.CountAndWordCount / 8; //one word, score = count
                                    score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)wordCount * (long)1000000 /
                                            ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument);
                                }

                                cur.DocId = docList.DocumentId;
                                cur.Rank  = docList.TotalWordsInThisDocument;

                                Docid2Long.Generate(ref cur, dBProvider, orderByFields, score);

                                if (comparer.Compare(last, cur) > 0)
                                {
                                    priorQueue.Add(cur);
                                    last = priorQueue.Last;
                                }
                            }
                            else
                            {
                                long score = 1;

                                if (orderByIncludingScore)
                                {
                                    int wordCount = docList.CountAndWordCount / 8; //one word, score = count
                                    score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)wordCount * (long)1000000 /
                                            ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument);
                                }

                                if (score < 0)
                                {
                                    //Overflow
                                    score = long.MaxValue - 4000000;
                                }

                                cur.DocId = docList.DocumentId;
                                cur.Rank  = docList.TotalWordsInThisDocument;

                                Docid2Long.Generate(ref cur, dBProvider, orderByFields, score);

                                priorQueue.Add(cur);
                                rows++;

                                if (rows == top)
                                {
                                    last = priorQueue.Last;
                                }
                            }

                            notEOF = GetNext(ref docList);
                        }

                        docIdRank.RelTotalCount = relCount;

                        foreach (Docid2Long docid2Long in priorQueue.ToArray())
                        {
                            long score = comparer.GetScore(docid2Long); //use Rank store TotalWordsInThisDocument

                            if (score < 0)
                            {
                                //Overflow
                                score = long.MaxValue - 4000000;
                            }

                            docIdRank.Add(docid2Long.DocId, new DocumentResult(docid2Long.DocId, score));
                        }
                    }
                    finally
                    {
                    }

                    docIdRank.Sorted = true;
                }
            }
            finally
            {
                Argument.DBProvider.SharedPayloadProvider.LeavePayloadShareLock();
            }
        }