Esempio n. 1
0
        public void Reset()
        {
            _WordDocEntities = new WordDocIdEntity[WordIndexes.Length];

            for (int index = 0; index < WordIndexes.Length; index++)
            {
                _WordDocEntities[index] = new WordDocIdEntity(index, WordIndexes[index]);

                _WordDocEntities[index].WordIndex.WordIndex.GetNextOriginal(ref _WordDocEntities[index].DocList);
            }

            Array.Sort(_WordDocEntities);

            _WordsCount      = _WordDocEntities.Length;
            SelectedIndexes  = new int[_WordsCount];
            SelectedDocLists = new  Hubble.Core.Entity.OriginalDocumentPositionList[_WordsCount];
            TotalDocIdCount  = 0;

            DBProvider dBProvider = _DBProvider;

            //vars for delete
            _HaveRecordsDeleted = dBProvider.DelProvider.Count > 0;
            _DelDocs            = null;
            _CurDelIndex        = 0;

            if (_HaveRecordsDeleted)
            {
                _DelDocs     = dBProvider.DelProvider.DelDocs;
                _CurDelDocid = _DelDocs[_CurDelIndex];
            }
        }
Esempio n. 2
0
        public void GetNextOriginal(ref Entity.OriginalDocumentPositionList odpl)
        {
            do
            {
                int fstDocId = _WordDocEntities[0].DocList.DocumentId;

                if (fstDocId < 0)
                {
                    odpl.DocumentId = fstDocId;
                    return;
                }

                //Find same docid
                SelectedIndexes[0]  = _WordDocEntities[0].Index;
                SelectedDocLists[0] = _WordDocEntities[0].DocList;
                SelectedCount       = 1;
                TotalDocIdCount++;
                int minIndex = SelectedIndexes[0];

                for (int i = 1; i < _WordDocEntities.Length; i++)
                {
                    if (fstDocId == _WordDocEntities[i].DocList.DocumentId)
                    {
                        SelectedIndexes[i]  = _WordDocEntities[i].Index;
                        SelectedDocLists[i] = _WordDocEntities[i].DocList;

                        if (minIndex > SelectedIndexes[i])
                        {
                            minIndex = SelectedIndexes[i];
                        }

                        SelectedCount++;
                    }
                    else
                    {
                        break;
                    }
                }

                odpl = _WordDocEntities[0].DocList;

                for (int i = 0; i < SelectedCount; i++)
                {
                    //Get the next docid from first entity of the word
                    _WordDocEntities[0].WordIndex.WordIndex.GetNextOriginal(ref _WordDocEntities[0].DocList);

                    //Re sort the priority queue
                    int cur     = 0;
                    int nextOne = cur + 1;

                    while (nextOne < _WordsCount)
                    {
                        if (_WordDocEntities[nextOne].DocList.DocumentId < 0)
                        {
                            break;
                        }

                        if (_WordDocEntities[cur].DocList.DocumentId > _WordDocEntities[nextOne].DocList.DocumentId)
                        {
                            //Swap when cur docid > next docid
                            WordDocIdEntity temp = _WordDocEntities[cur];

                            _WordDocEntities[cur] = _WordDocEntities[nextOne];

                            _WordDocEntities[nextOne] = temp;
                        }
                        else if (_WordDocEntities[cur].DocList.DocumentId < 0)
                        {
                            if (_WordDocEntities[cur].DocList.DocumentId < _WordDocEntities[nextOne].DocList.DocumentId)
                            {
                                //Swap when the docid of this word is empty from full text index
                                WordDocIdEntity temp = _WordDocEntities[cur];

                                _WordDocEntities[cur] = _WordDocEntities[nextOne];

                                _WordDocEntities[nextOne] = temp;
                            }
                            else
                            {
                                break;
                            }
                        }
                        else
                        {
                            break;
                        }

                        cur++;
                        nextOne++;
                    }
                }

                if (_HaveRecordsDeleted)
                {
                    if (_CurDelIndex < _DelDocs.Length)
                    {
                        int firstDocId = odpl.DocumentId;

                        //If docid deleted, get next
                        if (firstDocId == _CurDelDocid)
                        {
                            TotalDocIdCount--;
                            continue;
                        }
                        else if (firstDocId > _CurDelDocid)
                        {
                            //find the next deleted docid
                            while (_CurDelIndex < _DelDocs.Length && _CurDelDocid < firstDocId)
                            {
                                _CurDelIndex++;

                                if (_CurDelIndex >= _DelDocs.Length)
                                {
                                    _HaveRecordsDeleted = false;
                                    break;
                                }

                                _CurDelDocid = _DelDocs[_CurDelIndex];
                            }

                            if (_CurDelIndex < _DelDocs.Length)
                            {
                                if (firstDocId == _CurDelDocid)
                                {
                                    TotalDocIdCount--;
                                    continue;
                                }
                            }
                        }
                    }
                }

                if (_NeedGroupBy)
                {
                    if (TotalDocIdCount % _GroupByStep == 0 ||
                        (_OptimizeByScore && minIndex <= _IndexThreshold))
                    {
                        _GroupByDict.AddToGroupByCollection(odpl.DocumentId);
                    }

                    if (_GroupByDict.GroupByCollection.Count >= _GroupByLimit)
                    {
                        //more than group by limit, don't insert.
                        _NeedGroupBy = false;
                    }
                }

                if (_OptimizeByScore)
                {
                    if (minIndex > _IndexThreshold)
                    {
                        continue;
                    }
                }

                return;
            }while (true);
        }