Exemple #1
0
        private void StoreIndexToFile()
        {
            try
            {
                lock (this)
                {
                    if (_DBProvider.DelProvider != null)
                    {
                        _DBProvider.DelProvider.IncDeleteStamp();
                    }
                }

                if (_WordTableWriter == null)
                {
                    return;
                }

                int wordIndexWriterCount = _WordTableWriter.Count;

                if (wordIndexWriterCount <= 0)
                {
                    return;
                }

                Array.Sort(_WordIndexWriterPool, 0, wordIndexWriterCount);

                for (int index = 0; index < wordIndexWriterCount; index++)
                {
                    IEnumerable <DocumentPositionList> docList = _WordIndexWriterPool[index].GetDocListForWriter();

                    if (docList != null)
                    {
                        _IndexFileProxy.AddWordPositionAndDocumentPositionList(
                            _WordIndexWriterPool[index].Word, _WordIndexWriterPool[index].GetFirstDocList(),
                            _WordIndexWriterPool[index].Count, docList);
                    }
                }

                _WordTableWriter     = null;
                _WordIndexWriterPool = null;
                _IndexWriterPoolId   = 0;
                _TempWordIndexWriter = null;
                _DocPositionAlloc    = null;
            }
            catch (Exception e)
            {
                _IndexFileProxy.CloseIndexWriter();
                throw e;
            }

            _IndexFileProxy.Collect();
            _IndexMerge.Optimize(OptimizationOption.Speedy);
        }
Exemple #2
0
        //internal IEnumerable<DocumentPositionList> GetDocListForWriter1()
        //{
        //    int j = 0;
        //    foreach (DocumentPositionList docPositionList in GetDocListForWriter1())
        //    {
        //        if (docPositionList.DocumentId != _ListForWriter[j].DocumentId ||
        //            docPositionList.Count != _ListForWriter[j].Count ||
        //            docPositionList.FirstPosition != _ListForWriter[j].FirstPosition ||
        //            docPositionList.TotalWordsInThisDocument != _ListForWriter[j].TotalWordsInThisDocument)
        //        {
        //            Console.WriteLine();
        //        }

        //        j++;
        //    }

        //    for (int i = 0; i < _ListForWriter.Count; i++)
        //    {
        //        yield return _ListForWriter[i];
        //    }
        //}

        internal WordIndexWriter(string word, Data.Field.IndexMode mode, DocumentPositionAlloc alloc)
        {
            _Word                  = word;
            _IndexMode             = mode;
            _DocPositionAlloc      = alloc;
            _Count                 = 0;
            TempDocId              = 0;
            TempFirstPosition      = 0;
            TempWordCountInThisDoc = 0; //How many words (this word) in this doc
            TempTotalWordsInDoc    = 0; //Total words in this doc
            //_ListForWriter = new List<DocumentPositionList>();
            _First = -1;
            _Cur   = -1;
        }
Exemple #3
0
        /// <summary>
        /// Index a text for one field
        /// </summary>
        /// <param name="text">text</param>
        /// <param name="documentId">document id</param>
        /// <param name="analyzer">analyzer</param>
        private void Index(string text, int documentId, Analysis.IAnalyzer analyzer)
        {
            lock (this)
            {
                if (_WordTableWriter == null)
                {
                    _WordTableWriter = new Dictionary <string, int>(65536);
                }

                if (_DocPositionAlloc == null)
                {
                    _DocPositionAlloc = new DocumentPositionAlloc();
                }

                _DocumentCount++;
                if (_TempWordIndexWriter == null)
                {
                    _TempWordIndexWriter = new AppendList <int>(65536);
                }

                _TempWordIndexWriter.Clear();

                foreach (Entity.WordInfo wordInfo in analyzer.Tokenize(text))
                {
                    if (wordInfo.Position < 0)
                    {
                        continue;
                    }

                    string internedWord = string.IsInterned(wordInfo.Word);

                    if (internedWord == null)
                    {
                        internedWord = wordInfo.Word;
                    }

                    int index;

                    if (!_WordTableWriter.TryGetValue(internedWord, out index))
                    {
                        if (_WordIndexWriterPool == null)
                        {
                            _WordIndexWriterPool = new WordIndexWriter[65536];
                        }

                        if (_IndexWriterPoolId >= _WordIndexWriterPool.Length)
                        {
                            int nextLength = _WordIndexWriterPool.Length * 2;

                            WordIndexWriter[] tempPool = new WordIndexWriter[nextLength];
                            Array.Copy(_WordIndexWriterPool, tempPool, _WordIndexWriterPool.Length);
                            _WordIndexWriterPool = tempPool;
                        }

                        _WordIndexWriterPool[_IndexWriterPoolId]           = new WordIndexWriter(wordInfo.Word, _IndexMode, _DocPositionAlloc);
                        _WordIndexWriterPool[_IndexWriterPoolId].TempDocId = documentId;
                        _WordIndexWriterPool[_IndexWriterPoolId].TempWordCountInThisDoc = 0;
                        _WordIndexWriterPool[_IndexWriterPoolId].TempFirstPosition      = wordInfo.Position;
                        _WordIndexWriterPool[_IndexWriterPoolId].TempTotalWordsInDoc    = analyzer.Count;

                        _WordTableWriter.Add(wordInfo.Word, _IndexWriterPoolId);

                        _TempWordIndexWriter.Add(_IndexWriterPoolId);
                        index = _IndexWriterPoolId;
                        _IndexWriterPoolId++;
                    }

                    if (_WordIndexWriterPool[index].TempDocId != documentId)
                    {
                        _WordIndexWriterPool[index].TempDocId = documentId;
                        _WordIndexWriterPool[index].TempWordCountInThisDoc = 1;
                        _WordIndexWriterPool[index].TempFirstPosition      = wordInfo.Position;
                        _WordIndexWriterPool[index].TempTotalWordsInDoc    = analyzer.Count;
                        _TempWordIndexWriter.Add(index);
                    }
                    else
                    {
                        if (_WordIndexWriterPool[index].TempFirstPosition > wordInfo.Position)
                        {
                            _WordIndexWriterPool[index].TempFirstPosition = wordInfo.Position;
                        }

                        _WordIndexWriterPool[index].TempWordCountInThisDoc++;
                    }
                }

                foreach (int writeId in _TempWordIndexWriter)
                {
                    _WordIndexWriterPool[writeId].Index();
                }
            }
        }