Esempio n. 1
0
        unsafe private void Calculate(DocumentResultWhereDictionary upDict,
                                      ref DocumentResultWhereDictionary docIdRank, WordIndexForQuery[] wordIndexes)
        {
            Array.Sort(wordIndexes);

            AdjustSort(wordIndexes);

            MinResultCount = _DBProvider.Table.GroupByLimit;

            //Get max word doc list count
            int minWordDocListCount = 1 * 1024 * 1024; //1M

            foreach (WordIndexForQuery wifq in wordIndexes)
            {
                minWordDocListCount = Math.Min(minWordDocListCount, wifq.WordIndex.Count);
            }

            if (docIdRank.Count == 0)
            {
                if (minWordDocListCount > DocumentResultWhereDictionary.DefaultSize)
                {
                    docIdRank = new Core.SFQL.Parse.DocumentResultWhereDictionary(minWordDocListCount);
                }
            }

            Query.PerformanceReport performanceReport = new Hubble.Core.Query.PerformanceReport("Calculate");

            //Merge
            bool oneWordOptimize = this._QueryParameter.CanLoadPartOfDocs && this._QueryParameter.NoAndExpression && wordIndexes.Length == 1;
            int  oneWordMaxCount = 0;

            if (oneWordOptimize)
            {
                //One word
                WordIndexForQuery wifq = wordIndexes[0]; //first word

                //Entity.DocumentPositionList[] wifqDocBuf = wifq.WordIndex.DocPositionBuf;

                Entity.DocumentPositionList docList = wifq.WordIndex.GetNext();
                int j = 0;

                while (docList.DocumentId >= 0)
                {
                    //Entity.DocumentPositionList docList = wifq.WordIndex[j];

                    Core.SFQL.Parse.DocumentResultPoint drp;
                    drp.pDocumentResult = null;

                    if (j > MinResultCount)
                    {
                        if (oneWordMaxCount > docList.Count)
                        {
                            j++;
                            docList = wifq.WordIndex.GetNext();

                            continue;
                        }
                    }
                    else
                    {
                        if (oneWordMaxCount < docList.Count)
                        {
                            oneWordMaxCount = docList.Count;
                        }
                    }

                    long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument);

                    if (score < 0)
                    {
                        //Overflow
                        score = long.MaxValue - 4000000;
                    }

                    bool notInDict = false;

                    if (_NotInDict != null)
                    {
                        if (_NotInDict.ContainsKey(docList.DocumentId))
                        {
                            notInDict = true;
                        }
                    }

                    if (!notInDict)
                    {
                        if (upDict == null)
                        {
                            docIdRank.Add(docList.DocumentId, score);
                        }
                        else
                        {
                            if (!upDict.Not)
                            {
                                if (upDict.ContainsKey(docList.DocumentId))
                                {
                                    docIdRank.Add(docList.DocumentId, score);
                                }
                            }
                            else
                            {
                                if (!upDict.ContainsKey(docList.DocumentId))
                                {
                                    docIdRank.Add(docList.DocumentId, score);
                                }
                            }
                        }
                    }

                    j++;
                    docList = wifq.WordIndex.GetNext();
                }
            }
            else
            {
                int wordIndexesLen = wordIndexes.Length;

                WordIndexForQuery fstWifq = wordIndexes[0]; //first word

                Entity.DocumentPositionList fstDocList = fstWifq.WordIndex.GetNext();

                Entity.DocumentPositionList[] docListArr = new Hubble.Core.Entity.DocumentPositionList[wordIndexesLen];

                docListArr[0] = fstDocList;

                while (fstDocList.DocumentId >= 0)
                {
                    int curWord    = 1;
                    int firstDocId = fstDocList.DocumentId;

                    while (curWord < wordIndexesLen)
                    {
                        docListArr[curWord] = wordIndexes[curWord].WordIndex.Get(firstDocId);

                        if (docListArr[curWord].DocumentId < 0)
                        {
                            if ((wordIndexes[curWord].Flags & WordInfo.Flag.Or) != 0)
                            {
                                curWord++;
                                continue;
                            }
                            else
                            {
                                break;
                            }
                        }

                        curWord++;
                    } //While

                    if (curWord >= wordIndexesLen)
                    {
                        //Matched

                        long totalScore = 0;
                        for (int i = 0; i < wordIndexesLen; i++)
                        {
                            WordIndexForQuery           wifq    = wordIndexes[i];
                            Entity.DocumentPositionList docList = docListArr[i];

                            long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument);

                            if (score < 0)
                            {
                                //Overflow
                                score = long.MaxValue - 4000000;
                            }

                            totalScore += score;
                        }

                        bool notInDict = false;

                        if (_NotInDict != null)
                        {
                            if (_NotInDict.ContainsKey(firstDocId))
                            {
                                notInDict = true;
                            }
                        }

                        if (!notInDict)
                        {
                            if (upDict == null)
                            {
                                docIdRank.Add(firstDocId, totalScore);
                            }
                            else
                            {
                                if (!upDict.Not)
                                {
                                    if (upDict.ContainsKey(firstDocId))
                                    {
                                        docIdRank.Add(firstDocId, totalScore);
                                    }
                                }
                                else
                                {
                                    if (!upDict.ContainsKey(firstDocId))
                                    {
                                        docIdRank.Add(firstDocId, totalScore);
                                    }
                                }
                            }
                        }
                    }

                    fstDocList    = fstWifq.WordIndex.GetNext();
                    docListArr[0] = fstDocList;
                }
            }

            //Merge score if upDict != null
            if (upDict != null)
            {
                if (!upDict.Not)
                {
                    foreach (int docid in docIdRank.Keys)
                    {
                        DocumentResult *upDrp;

                        if (upDict.TryGetValue(docid, out upDrp))
                        {
                            DocumentResult *drpResult;
                            if (docIdRank.TryGetValue(docid, out drpResult))
                            {
                                drpResult->Score += upDrp->Score;
                            }
                        }
                    }
                }
            }

            DeleteProvider delProvider = _DBProvider.DelProvider;
            int            delCount    = delProvider.Filter(docIdRank);

            if (oneWordOptimize && _QueryParameter.CanLoadPartOfDocs && upDict == null)
            {
                docIdRank.RelTotalCount = wordIndexes[0].RelTotalCount - delCount;
            }
            else
            {
                docIdRank.RelTotalCount = docIdRank.Count;
            }

            performanceReport.Stop();
        }
Esempio n. 2
0
        unsafe private void CalculateWithPosition(Core.SFQL.Parse.DocumentResultWhereDictionary upDict,
                                                  ref Core.SFQL.Parse.DocumentResultWhereDictionary docIdRank, WordIndexForQuery[] wordIndexes)
        {
            if (UseMatch(wordIndexes))
            {
                CalculateWithPositionMatch(upDict, ref docIdRank, wordIndexes);
                return;
            }

            Array.Sort(wordIndexes);

            AdjustSort(wordIndexes);

            MinResultCount = _DBProvider.Table.GroupByLimit;

            //Get max word doc list count
            int minWordDocListCount = 1 * 1024 * 1024; //1M

            foreach (WordIndexForQuery wifq in wordIndexes)
            {
                minWordDocListCount = Math.Min(minWordDocListCount, wifq.WordIndex.Count);
            }


            Query.PerformanceReport performanceReport = new Hubble.Core.Query.PerformanceReport("Calculate");

            //Merge
            bool oneWordOptimize = this._QueryParameter.CanLoadPartOfDocs && this._QueryParameter.AndExpressionCanBeOptimized(_DBProvider) &&
                                   wordIndexes.Length == 1 && _NotInDict == null && _QueryParameter.End >= 0 && !_QueryParameter.NeedDistinct;

            if (oneWordOptimize)
            {
                IQueryOptimize qOptimize = QueryOptimizeBuilder.Build(typeof(OneWordOptimize),
                                                                      DBProvider, _QueryParameter.End, _QueryParameter.OrderBy,
                                                                      _QueryParameter.OrderBys, _QueryParameter.NeedGroupBy,
                                                                      _QueryParameter.OrderByCanBeOptimized, _QueryParameter.NeedFilterUntokenizedConditions(this._DBProvider),
                                                                      _QueryParameter.UntokenizedTreeOnRoot, wordIndexes);

                try
                {
                    qOptimize.CalculateOptimize(upDict, ref docIdRank);
                    return;
                }
                finally
                {
                    performanceReport.Stop();
                }
            }

            if (this._QueryParameter.CanLoadPartOfDocs && this._QueryParameter.AndExpressionCanBeOptimized(_DBProvider) &&
                _NotInDict == null && _QueryParameter.End >= 0 && !_QueryParameter.NeedDistinct)
            {
                IQueryOptimize qOptimize = QueryOptimizeBuilder.Build(typeof(ContainsOptimize),
                                                                      DBProvider, _QueryParameter.End, _QueryParameter.OrderBy,
                                                                      _QueryParameter.OrderBys, _QueryParameter.NeedGroupBy,
                                                                      _QueryParameter.OrderByCanBeOptimized, _QueryParameter.NeedFilterUntokenizedConditions(this._DBProvider),
                                                                      _QueryParameter.UntokenizedTreeOnRoot, wordIndexes);

                try
                {
                    qOptimize.CalculateOptimize(upDict, ref docIdRank);
                    return;
                }
                finally
                {
                    performanceReport.Stop();
                }

                //if (qOptimize.Argument.IsOrderByScoreDesc())
                //{

                //}
            }

            if (docIdRank.Count == 0)
            {
                if (minWordDocListCount > DocumentResultWhereDictionary.DefaultSize)
                {
                    docIdRank = new Core.SFQL.Parse.DocumentResultWhereDictionary(minWordDocListCount);
                }
            }

            {
                double ratio = 1;

                if (wordIndexes.Length > 1)
                {
                    ratio = (double)2 / (double)(wordIndexes.Length - 1);
                }

                int wordIndexesLen = wordIndexes.Length;

                WordIndexForQuery fstWifq = wordIndexes[0]; //first word

                OriginalDocumentPositionList fstODPL = new OriginalDocumentPositionList();
                fstWifq.WordIndex.GetNextOriginal(ref fstODPL);

                //Entity.DocumentPositionList fstDocList = fstWifq.WordIndex.GetNext();

                Entity.DocumentPositionList[] docListArr = new Hubble.Core.Entity.DocumentPositionList[wordIndexesLen];

                //docListArr[0] = fstDocList;
                fstODPL.ToDocumentPositionList(ref docListArr[0]);

                OriginalDocumentPositionList odpl = new OriginalDocumentPositionList();

                while (fstODPL.DocumentId >= 0)
                {
                    int curWord    = 1;
                    int firstDocId = fstODPL.DocumentId;

                    while (curWord < wordIndexesLen)
                    {
                        //docListArr[curWord] = wordIndexes[curWord].WordIndex.Get(firstDocId);

                        wordIndexes[curWord].WordIndex.GetNextOriginalWithDocId(ref odpl, firstDocId);
                        odpl.ToDocumentPositionList(ref docListArr[curWord]);

                        if (docListArr[curWord].DocumentId < 0)
                        {
                            if ((wordIndexes[curWord].Flags & WordInfo.Flag.Or) != 0)
                            {
                                curWord++;
                                continue;
                            }
                            else
                            {
                                break;
                            }
                        }

                        curWord++;
                    } //While

                    if (curWord >= wordIndexesLen)
                    {
                        //Matched
                        //Caculate score

                        long totalScore = 0;
                        Entity.DocumentPositionList lastDocList
                            = new Hubble.Core.Entity.DocumentPositionList();

                        for (int i = 0; i < wordIndexesLen; i++)
                        {
                            WordIndexForQuery wifq = wordIndexes[i];

                            if (wifq.WordIndex.Count == 0)
                            {
                                //a^5000^0 b^5000^2^1
                                //if has a and hasn't b but b can be or
                                //2010-09-30 eaglet
                                continue;
                            }

                            Entity.DocumentPositionList docList = docListArr[i];


                            long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument);

                            if (score < 0)
                            {
                                //Overflow
                                score = long.MaxValue - 4000000;
                            }

                            double delta = 1;

                            if (i > 0)
                            {
                                //Calculate with position
                                double queryPositionDelta = wifq.FirstPosition - wordIndexes[i - 1].FirstPosition;
                                double positionDelta      = docList.FirstPosition - lastDocList.FirstPosition;

                                delta = Math.Abs(queryPositionDelta - positionDelta);

                                if (delta < 0.031)
                                {
                                    delta = 0.031;
                                }
                                else if (delta <= 1.1)
                                {
                                    delta = 0.5;
                                }
                                else if (delta <= 2.1)
                                {
                                    delta = 1;
                                }

                                delta = Math.Pow((1 / delta), ratio) * docList.Count * lastDocList.Count /
                                        (double)(wifq.QueryCount * wordIndexes[i - 1].QueryCount);
                            }

                            lastDocList = docList;

                            totalScore += (long)(score * delta);
                        }

                        bool notInDict = false;

                        if (_NotInDict != null)
                        {
                            if (_NotInDict.ContainsKey(firstDocId))
                            {
                                notInDict = true;
                            }
                        }

                        if (!notInDict)
                        {
                            if (upDict == null)
                            {
                                docIdRank.Add(firstDocId, totalScore);
                            }
                            else
                            {
                                if (!upDict.Not)
                                {
                                    if (upDict.ContainsKey(firstDocId))
                                    {
                                        docIdRank.Add(firstDocId, totalScore);
                                    }
                                }
                                else
                                {
                                    if (!upDict.ContainsKey(firstDocId))
                                    {
                                        docIdRank.Add(firstDocId, totalScore);
                                    }
                                }
                            }
                        }
                    }//if (curWord >= wordIndexesLen)

                    //fstDocList = fstWifq.WordIndex.GetNext();
                    //docListArr[0] = fstDocList;

                    fstWifq.WordIndex.GetNextOriginal(ref fstODPL);
                    fstODPL.ToDocumentPositionList(ref docListArr[0]);
                }
            }

            //Merge score if upDict != null
            if (upDict != null)
            {
                if (!upDict.Not)
                {
                    foreach (int docid in docIdRank.Keys)
                    {
                        DocumentResult *upDrp;

                        if (upDict.TryGetValue(docid, out upDrp))
                        {
                            DocumentResult *drpResult;
                            if (docIdRank.TryGetValue(docid, out drpResult))
                            {
                                drpResult->Score += upDrp->Score;
                            }
                        }
                    }
                }
            }

            DeleteProvider delProvider = _DBProvider.DelProvider;
            int            delCount    = delProvider.Filter(docIdRank);

            if (oneWordOptimize && _QueryParameter.CanLoadPartOfDocs && upDict == null)
            {
                docIdRank.RelTotalCount = wordIndexes[0].RelTotalCount - delCount;
            }
            else
            {
                docIdRank.RelTotalCount = docIdRank.Count;
            }

            performanceReport.Stop();
        }
Esempio n. 3
0
        private unsafe void CalculateNormal(Hubble.Core.SFQL.Parse.DocumentResultWhereDictionary upDict, ref Hubble.Core.SFQL.Parse.DocumentResultWhereDictionary docIdRank)
        {
            DBProvider dBProvider  = Argument.DBProvider;
            bool       needGroupBy = Argument.NeedGroupBy;

            bool           needFilterUntokenizedConditions = this.Argument.NeedFilterUntokenizedConditions;
            ExpressionTree untokenizedTree = this.Argument.UntokenizedTreeOnRoot;

            Query.DocumentResult  documentResult;
            Query.DocumentResult *drp = &documentResult;

            //vars for delete
            bool haveRecordsDeleted = dBProvider.DelProvider.Count > 0;

            int[] delDocs     = null;
            int   curDelIndex = 0;
            int   curDelDocid = 0;

            Field[]            orderByFields;
            DocId2LongComparer comparer = DocId2LongComparer.Generate(
                dBProvider, Argument.OrderBys, out orderByFields);

            if (haveRecordsDeleted)
            {
                delDocs     = dBProvider.DelProvider.DelDocs;
                curDelDocid = delDocs[curDelIndex];
            }


            double ratio = 1;

            WordIndexForQuery[] wordIndexes = WordIndexes;

            if (wordIndexes.Length > 1)
            {
                ratio = (double)2 / (double)(wordIndexes.Length - 1);
            }

            int wordIndexesLen = wordIndexes.Length;

            WordIndexForQuery fstWifq = wordIndexes[0]; //first word

            OriginalDocumentPositionList fstODPL = new OriginalDocumentPositionList();

            //Entity.DocumentPositionList fstDocList = fstWifq.WordIndex.GetNext();

            Entity.DocumentPositionList[] docListArr = new Hubble.Core.Entity.DocumentPositionList[wordIndexesLen];

            //docListArr[0] = fstDocList;
            //fstWifq.WordIndex.GetNextOriginal(ref fstODPL);
            //fstODPL.ToDocumentPositionList(ref docListArr[0]);

            GetNext(fstWifq, ref fstODPL, ref docListArr[0]);

            OriginalDocumentPositionList odpl = new OriginalDocumentPositionList();

            Entity.DocumentPositionList lastDocList
                = new Hubble.Core.Entity.DocumentPositionList();
            int top;

            //calculate top
            //If less than 100, set to 100
            if (this.Argument.End >= 0)
            {
                top = (1 + this.Argument.End / 100) * 100;

                if (top <= 0)
                {
                    top = 100;
                }

                //if (this.Argument.End * 2 > top)
                //{
                //    top *= 2;
                //}
            }
            else
            {
                top = int.MaxValue;
            }

            PriorQueue <Docid2Long> priorQueue = new PriorQueue <Docid2Long>(top, comparer);

            int rows = 0;

            Docid2Long cur  = new Docid2Long();
            Docid2Long last = new Docid2Long();

            last.DocId = -1;

            while (fstODPL.DocumentId >= 0)
            {
                int curWord    = 1;
                int firstDocId = fstODPL.DocumentId;

                while (curWord < wordIndexesLen)
                {
                    //docListArr[curWord] = wordIndexes[curWord].WordIndex.Get(firstDocId);

                    wordIndexes[curWord].WordIndex.GetNextOriginalWithDocId(ref odpl, firstDocId);
                    odpl.ToDocumentPositionList(ref docListArr[curWord]);

                    if (docListArr[curWord].DocumentId < 0)
                    {
                        if ((wordIndexes[curWord].Flags & WordInfo.Flag.Or) != 0)
                        {
                            curWord++;
                            continue;
                        }
                        else
                        {
                            break;
                        }
                    }

                    curWord++;
                } //While

                if (curWord >= wordIndexesLen)
                {
                    //Process untokenized conditions.
                    //If is not matched, get the next one.
                    if (needFilterUntokenizedConditions)
                    {
                        int docId = firstDocId;
                        drp->DocId       = docId;
                        drp->PayloadData = dBProvider.GetPayloadDataWithShareLock(docId);
                        if (!ParseWhere.GetComparisionExpressionValue(dBProvider, drp,
                                                                      untokenizedTree))
                        {
                            GetNext(fstWifq, ref fstODPL, ref docListArr[0]);
                            continue;
                        }
                    }

                    //Matched
                    //Caculate score

                    long totalScore = 0;
                    lastDocList.Count         = 0;
                    lastDocList.FirstPosition = 0;

                    for (int i = 0; i < wordIndexesLen; i++)
                    {
                        WordIndexForQuery wifq = wordIndexes[i];

                        if (wifq.WordIndex.Count == 0)
                        {
                            //a^5000^0 b^5000^2^1
                            //if has a and hasn't b but b can be or
                            //2010-09-30 eaglet
                            continue;
                        }

                        Entity.DocumentPositionList docList = docListArr[i];


                        long score = (long)wifq.FieldRank * (long)wifq.WordRank * (long)wifq.Idf_t * (long)docList.Count * (long)1000000 / ((long)wifq.Sum_d_t * (long)docList.TotalWordsInThisDocument);

                        if (score < 0)
                        {
                            //Overflow
                            score = long.MaxValue - 4000000;
                        }

                        double delta = 1;

                        if (i > 0)
                        {
                            //Calculate with position
                            double queryPositionDelta = wifq.FirstPosition - wordIndexes[i - 1].FirstPosition;
                            double positionDelta      = docList.FirstPosition - lastDocList.FirstPosition;

                            delta = Math.Abs(queryPositionDelta - positionDelta);

                            if (delta < 0.031)
                            {
                                delta = 0.031;
                            }
                            else if (delta <= 1.1)
                            {
                                delta = 0.5;
                            }
                            else if (delta <= 2.1)
                            {
                                delta = 1;
                            }

                            delta = Math.Pow((1 / delta), ratio) * docList.Count * lastDocList.Count /
                                    (double)(wifq.QueryCount * wordIndexes[i - 1].QueryCount);
                        }

                        lastDocList.Count         = docList.Count;
                        lastDocList.FirstPosition = docList.FirstPosition;

                        totalScore += (long)(score * delta);
                    } //End for cycle

                    if (haveRecordsDeleted)
                    {
                        if (curDelIndex < delDocs.Length)
                        {
                            //If docid deleted, get next
                            if (firstDocId == curDelDocid)
                            {
                                GetNext(fstWifq, ref fstODPL, ref docListArr[0]);
                                continue;
                            }
                            else if (firstDocId > curDelDocid)
                            {
                                while (curDelIndex < delDocs.Length && curDelDocid < firstDocId)
                                {
                                    curDelIndex++;

                                    if (curDelIndex >= delDocs.Length)
                                    {
                                        haveRecordsDeleted = false;
                                        break;
                                    }

                                    curDelDocid = delDocs[curDelIndex];
                                }

                                if (curDelIndex < delDocs.Length)
                                {
                                    if (firstDocId == curDelDocid)
                                    {
                                        GetNext(fstWifq, ref fstODPL, ref docListArr[0]);
                                        continue;
                                    }
                                }
                            }
                        }
                    }

                    if (needGroupBy)
                    {
                        docIdRank.AddToGroupByCollection(firstDocId);
                    }

                    if (_HasRankField)
                    {
                        int rank = dBProvider.SharedPayloadProvider.GetPayloadRank(firstDocId);
                        totalScore *= rank;
                        if (totalScore < 0)
                        {
                            totalScore = long.MaxValue - 4000000;
                        }
                    }

                    if (rows >= top)
                    {
                        rows++;

                        cur.DocId = firstDocId;

                        Docid2Long.Generate(ref cur, dBProvider, orderByFields, totalScore);

                        if (comparer.Compare(last, cur) > 0)
                        {
                            priorQueue.Add(cur);
                            last = priorQueue.Last;
                        }
                    }
                    else
                    {
                        cur.DocId = firstDocId;

                        Docid2Long.Generate(ref cur, dBProvider, orderByFields, totalScore);
                        priorQueue.Add(cur);

                        rows++;

                        if (rows == top)
                        {
                            last = priorQueue.Last;
                        }
                    }

                    //docIdRank.Add(firstDocId, totalScore);
                }//if (curWord >= wordIndexesLen)

                GetNext(fstWifq, ref fstODPL, ref docListArr[0]);

                //fstWifq.WordIndex.GetNextOriginal(ref fstODPL);
                //fstODPL.ToDocumentPositionList(ref docListArr[0]);
            }

            docIdRank.RelTotalCount = rows;

            foreach (Docid2Long docid2Long in priorQueue.ToArray())
            {
                long score = comparer.GetScore(docid2Long);

                if (score < 0)
                {
                    //Overflow
                    score = long.MaxValue - 4000000;
                }

                docIdRank.Add(docid2Long.DocId, new DocumentResult(docid2Long.DocId, score));
            }

            docIdRank.Sorted = true;
        }
Esempio n. 4
0
        static public DocumentPositionList[] Deserialize(System.IO.Stream stream, ref int documentsCount, bool simple, out long wordCountSum)
        {
            wordCountSum = 0;

            int docsCount = VInt.sReadFromStream(stream);

            if (docsCount == 0)
            {
                //This index has skip doc index
                DeserializeSkipDocIndex(stream, true);

                docsCount = VInt.sReadFromStream(stream);
            }

            int relDocCount = docsCount;

            int lastDocId = VInt.sReadFromStream(stream);

            int count = VInt.sReadFromStream(stream);

            docsCount = Math.Min(docsCount, documentsCount);

            DocumentPositionList[] result = new DocumentPositionList[docsCount];

            if (docsCount <= 0)
            {
                documentsCount = relDocCount;
                return(result);
            }

            if (!simple)
            {
                int firstPosition = VInt.sReadFromStream(stream);
                result[0] = new DocumentPositionList(lastDocId, count / 8, (Int16)(count % 8), firstPosition);
            }
            else
            {
                result[0] = new DocumentPositionList(lastDocId, count / 8, (Int16)(count % 8));
            }

            if (docsCount == 1)
            {
                wordCountSum = 1;
            }

            for (int i = 1; i < docsCount; i++)
            {
                lastDocId = VInt.sReadFromStream(stream) + lastDocId;
                count     = VInt.sReadFromStream(stream);
                int docCount = (Int16)(count / 8);

                if (docCount >= 32768)
                {
                    docCount = 32767;
                }

                if (!simple)
                {
                    int firstPosition = VInt.sReadFromStream(stream);
                    result[i] = new DocumentPositionList(lastDocId, docCount, (Int16)(count % 8), firstPosition);
                }
                else
                {
                    result[i] = new DocumentPositionList(lastDocId, docCount, (Int16)(count % 8));
                }

                wordCountSum += docCount;
            }

            documentsCount = relDocCount;

            return(result);
        }
Esempio n. 5
0
        static public void Serialize(DocumentPositionList first, int docsCount, IEnumerable <DocumentPositionList> docPositions, System.IO.Stream stream, bool simple)
        {
            //int docsCount = docPositions.Count;

            //Write documets count
            VInt.sWriteToStream(docsCount, stream);

            //DocumentPositionList first = docPositions.GetEnumerator();

            //Write first document id
            int lstDocId = first.DocumentId;

            VInt.sWriteToStream(lstDocId, stream);

            int count = first.Count;

            if (count >= 32768)
            {
                count = 32767;
            }

            count *= 8; //Shift 3 bit
            count += first._TotalWordsInThisDocumentIndex;

            VInt.sWriteToStream(count, stream);
            if (!simple)
            {
                VInt.sWriteToStream(first.FirstPosition, stream);
            }

            int i = 0;

            foreach (DocumentPositionList docPosition in docPositions)
            {
                i++;

                if (i == 1)
                {
                    continue;
                }

                VInt.sWriteToStream(docPosition.DocumentId - lstDocId, stream);

                count = docPosition.Count;

                if (count >= 32768)
                {
                    count = 32767;
                }

                count *= 8; //Shift 3 bit
                count += docPosition._TotalWordsInThisDocumentIndex;

                VInt.sWriteToStream(count, stream);

                if (!simple)
                {
                    VInt.sWriteToStream(docPosition.FirstPosition, stream);
                }

                lstDocId = docPosition.DocumentId;
            }

            byte[] lstDocIdBuf = BitConverter.GetBytes(lstDocId);
            stream.Write(lstDocIdBuf, 0, lstDocIdBuf.Length);
        }
Esempio n. 6
0
 public void ToDocumentPositionList(ref DocumentPositionList dpl)
 {
     dpl.Set(ref this);
 }