private void StoreIndexToFile() { try { lock (this) { if (_DBProvider.DelProvider != null) { _DBProvider.DelProvider.IncDeleteStamp(); } } if (_WordTableWriter == null) { return; } int wordIndexWriterCount = _WordTableWriter.Count; if (wordIndexWriterCount <= 0) { return; } Array.Sort(_WordIndexWriterPool, 0, wordIndexWriterCount); for (int index = 0; index < wordIndexWriterCount; index++) { IEnumerable <DocumentPositionList> docList = _WordIndexWriterPool[index].GetDocListForWriter(); if (docList != null) { _IndexFileProxy.AddWordPositionAndDocumentPositionList( _WordIndexWriterPool[index].Word, _WordIndexWriterPool[index].GetFirstDocList(), _WordIndexWriterPool[index].Count, docList); } } _WordTableWriter = null; _WordIndexWriterPool = null; _IndexWriterPoolId = 0; _TempWordIndexWriter = null; _DocPositionAlloc = null; } catch (Exception e) { _IndexFileProxy.CloseIndexWriter(); throw e; } _IndexFileProxy.Collect(); _IndexMerge.Optimize(OptimizationOption.Speedy); }
//internal IEnumerable<DocumentPositionList> GetDocListForWriter1() //{ // int j = 0; // foreach (DocumentPositionList docPositionList in GetDocListForWriter1()) // { // if (docPositionList.DocumentId != _ListForWriter[j].DocumentId || // docPositionList.Count != _ListForWriter[j].Count || // docPositionList.FirstPosition != _ListForWriter[j].FirstPosition || // docPositionList.TotalWordsInThisDocument != _ListForWriter[j].TotalWordsInThisDocument) // { // Console.WriteLine(); // } // j++; // } // for (int i = 0; i < _ListForWriter.Count; i++) // { // yield return _ListForWriter[i]; // } //} internal WordIndexWriter(string word, Data.Field.IndexMode mode, DocumentPositionAlloc alloc) { _Word = word; _IndexMode = mode; _DocPositionAlloc = alloc; _Count = 0; TempDocId = 0; TempFirstPosition = 0; TempWordCountInThisDoc = 0; //How many words (this word) in this doc TempTotalWordsInDoc = 0; //Total words in this doc //_ListForWriter = new List<DocumentPositionList>(); _First = -1; _Cur = -1; }
/// <summary> /// Index a text for one field /// </summary> /// <param name="text">text</param> /// <param name="documentId">document id</param> /// <param name="analyzer">analyzer</param> private void Index(string text, int documentId, Analysis.IAnalyzer analyzer) { lock (this) { if (_WordTableWriter == null) { _WordTableWriter = new Dictionary <string, int>(65536); } if (_DocPositionAlloc == null) { _DocPositionAlloc = new DocumentPositionAlloc(); } _DocumentCount++; if (_TempWordIndexWriter == null) { _TempWordIndexWriter = new AppendList <int>(65536); } _TempWordIndexWriter.Clear(); foreach (Entity.WordInfo wordInfo in analyzer.Tokenize(text)) { if (wordInfo.Position < 0) { continue; } string internedWord = string.IsInterned(wordInfo.Word); if (internedWord == null) { internedWord = wordInfo.Word; } int index; if (!_WordTableWriter.TryGetValue(internedWord, out index)) { if (_WordIndexWriterPool == null) { _WordIndexWriterPool = new WordIndexWriter[65536]; } if (_IndexWriterPoolId >= _WordIndexWriterPool.Length) { int nextLength = _WordIndexWriterPool.Length * 2; WordIndexWriter[] tempPool = new WordIndexWriter[nextLength]; Array.Copy(_WordIndexWriterPool, tempPool, _WordIndexWriterPool.Length); _WordIndexWriterPool = tempPool; } _WordIndexWriterPool[_IndexWriterPoolId] = new WordIndexWriter(wordInfo.Word, _IndexMode, _DocPositionAlloc); _WordIndexWriterPool[_IndexWriterPoolId].TempDocId = documentId; _WordIndexWriterPool[_IndexWriterPoolId].TempWordCountInThisDoc = 0; _WordIndexWriterPool[_IndexWriterPoolId].TempFirstPosition = wordInfo.Position; _WordIndexWriterPool[_IndexWriterPoolId].TempTotalWordsInDoc = analyzer.Count; _WordTableWriter.Add(wordInfo.Word, _IndexWriterPoolId); _TempWordIndexWriter.Add(_IndexWriterPoolId); index = _IndexWriterPoolId; _IndexWriterPoolId++; } if (_WordIndexWriterPool[index].TempDocId != documentId) { _WordIndexWriterPool[index].TempDocId = documentId; _WordIndexWriterPool[index].TempWordCountInThisDoc = 1; _WordIndexWriterPool[index].TempFirstPosition = wordInfo.Position; _WordIndexWriterPool[index].TempTotalWordsInDoc = analyzer.Count; _TempWordIndexWriter.Add(index); } else { if (_WordIndexWriterPool[index].TempFirstPosition > wordInfo.Position) { _WordIndexWriterPool[index].TempFirstPosition = wordInfo.Position; } _WordIndexWriterPool[index].TempWordCountInThisDoc++; } } foreach (int writeId in _TempWordIndexWriter) { _WordIndexWriterPool[writeId].Index(); } } }