void UpdateIndex(string fullPath, PendingRetrySource pendingRetrySource = null) { if (IsFile(fullPath)) { var fileInfo = new FileInfo(fullPath); try { Thread.Sleep(WaitMilliseconds); // Wait to let file finished write to disk if (fileInfo.Exists) { var content = FilesContentHelper.ReadAllText(fullPath); var document = CodeIndexBuilder.GetDocumentFromSource(CodeSource.GetCodeSource(fileInfo, content)); CodeIndexBuilder.UpdateIndex(config.LuceneIndexForCode, GetNoneTokenizeFieldTerm(nameof(CodeSource.FilePath), fullPath), document); WordsHintBuilder.UpdateWordsHint(config, WordSegmenter.GetWords(content), log); pendingChanges++; } } catch (IOException) { AddFileChangesToRetrySouce(fullPath, WatcherChangeTypes.Changed, pendingRetrySource); } catch (Exception ex) { log?.Error(ex.ToString()); } } }
void CreateNewIndex(string fullPath, PendingRetrySource pendingRetrySource = null) { if (IsFile(fullPath)) { var fileInfo = new FileInfo(fullPath); try { Thread.Sleep(WaitMilliseconds); // Wait to let file finished write to disk if (fileInfo.Exists) { var content = FilesContentHelper.ReadAllText(fullPath); CodeIndexBuilder.BuildIndex(config, false, false, false, new[] { CodeSource.GetCodeSource(fileInfo, content) }); WordsHintBuilder.UpdateWordsHint(config, WordSegmenter.GetWords(content), log); pendingChanges++; } } catch (IOException) { AddFileChangesToRetrySouce(fullPath, WatcherChangeTypes.Created, pendingRetrySource); } catch (Exception ex) { log?.Error(ex.ToString()); } } }
public List <FileInfo> BuildIndexByBatch(IEnumerable <FileInfo> fileInfos, bool needCommit, bool triggerMerge, bool applyAllDeletes, CancellationToken cancellationToken, bool brandNewBuild, int batchSize = 10000) { cancellationToken.ThrowIfCancellationRequested(); fileInfos.RequireNotNull(nameof(fileInfos)); batchSize.RequireRange(nameof(batchSize), int.MaxValue, 50); var codeDocuments = new List <Document>(); var wholeWords = new HashSet <string>(); var newHintWords = new HashSet <string>(); var failedIndexFiles = new List <FileInfo>(); try { foreach (var fileInfo in fileInfos) { cancellationToken.ThrowIfCancellationRequested(); try { if (fileInfo.Exists) { var source = CodeSource.GetCodeSource(fileInfo, FilesContentHelper.ReadAllText(fileInfo.FullName)); AddHintWords(newHintWords, wholeWords, source.Content); var doc = IndexBuilderHelper.GetDocumentFromSource(source); codeDocuments.Add(doc); Log.LogInformation($"{Name}: Add index for {source.FilePath}"); } } catch (Exception ex) { failedIndexFiles.Add(fileInfo); Log.LogError($"{Name}: Add index for {fileInfo.FullName} failed, exception: " + ex); } if (codeDocuments.Count >= batchSize) { BuildIndex(needCommit, triggerMerge, applyAllDeletes, codeDocuments, newHintWords, cancellationToken, brandNewBuild); codeDocuments.Clear(); newHintWords.Clear(); } } if (codeDocuments.Count > 0) { BuildIndex(needCommit, triggerMerge, applyAllDeletes, codeDocuments, newHintWords, cancellationToken, brandNewBuild); } return(failedIndexFiles); } finally { wholeWords.Clear(); newHintWords.Clear(); codeDocuments.Clear(); } }
public static void BuildIndexByBatch(CodeIndexConfiguration config, bool triggerMerge, bool applyAllDeletes, bool needFlush, IEnumerable <FileInfo> fileInfos, bool deleteExistIndex, ILog log, out List <FileInfo> failedIndexFiles, int batchSize = 1000, bool needHint = true) { config.RequireNotNull(nameof(config)); fileInfos.RequireNotNull(nameof(fileInfos)); batchSize.RequireRange(nameof(batchSize), int.MaxValue, 50); var needDeleteExistIndex = deleteExistIndex && IndexExists(config.LuceneIndexForCode); var documents = new List <Document>(); failedIndexFiles = new List <FileInfo>(); foreach (var fileInfo in fileInfos) { try { if (fileInfo.Exists) { var source = CodeSource.GetCodeSource(fileInfo, FilesContentHelper.ReadAllText(fileInfo.FullName)); if (needDeleteExistIndex) { DeleteIndex(config.LuceneIndexForCode, new Term(nameof(CodeSource.FilePath) + Constants.NoneTokenizeFieldSuffix, source.FilePath)); } if (needHint) { WordsHintBuilder.AddWords(WordSegmenter.GetWords(source.Content)); } var doc = GetDocumentFromSource(source); documents.Add(doc); log?.Info($"Add index For {source.FilePath}"); } } catch (Exception ex) { failedIndexFiles.Add(fileInfo); log?.Error($"Add index for {fileInfo.FullName} failed, exception: " + ex.ToString()); } if (documents.Count >= batchSize) { BuildIndex(config, triggerMerge, applyAllDeletes, documents, needFlush, log); documents.Clear(); } } if (documents.Count > 0) { BuildIndex(config, triggerMerge, applyAllDeletes, documents, needFlush, log); } }
public void TestGetContent_ReadContentUsedByAnotherProcess() { using var stream1 = File.Create(Path.Combine(TempDir, "AAA.cs")); stream1.Write(Encoding.UTF8.GetBytes("这是一个例句")); stream1.Close(); using var stream2 = new FileStream(Path.Combine(TempDir, "AAA.cs"), FileMode.Open, FileAccess.Write); Assert.DoesNotThrow(() => { var content = FilesContentHelper.ReadAllText(Path.Combine(TempDir, "AAA.cs")); Assert.AreEqual("这是一个例句", content); }, "Can read file content used by another process"); }
public IndexBuildResults CreateIndex(FileInfo fileInfo) { try { if (fileInfo.Exists) { var source = CodeSource.GetCodeSource(fileInfo, FilesContentHelper.ReadAllText(fileInfo.FullName)); var words = new HashSet <string>(); AddHintWords(words, source.Content); var doc = IndexBuilderHelper.GetDocumentFromSource(source); CodeIndexPool.UpdateIndex(GetNoneTokenizeFieldTerm(nameof(CodeSource.FilePath), source.FilePath), doc); foreach (var word in words) { HintIndexPool.UpdateIndex(new Term(nameof(CodeWord.Word), word), new Document { new StringField(nameof(CodeWord.Word), word, Field.Store.YES), new StringField(nameof(CodeWord.WordLower), word.ToLowerInvariant(), Field.Store.YES) }); } Log.LogInformation($"{Name}: Create index For {source.FilePath} finished"); } return(IndexBuildResults.Successful); } catch (Exception ex) { Log.LogError($"{Name}: Create index for {fileInfo.FullName} failed, exception: " + ex); if (ex is IOException) { return(IndexBuildResults.FailedWithIOException); } else if (ex is OperationCanceledException) { throw; } return(IndexBuildResults.FailedWithError); } }
void FileRenamed(string oldFullPath, string fullPath, PendingRetrySource pendingRetrySource = null) { try { if (IsFile(fullPath)) { var fileInfo = new FileInfo(fullPath); try { if (fileInfo.Exists) { var content = FilesContentHelper.ReadAllText(fullPath); var document = CodeIndexBuilder.GetDocumentFromSource(CodeSource.GetCodeSource(fileInfo, content)); CodeIndexBuilder.UpdateIndex(config.LuceneIndexForCode, GetNoneTokenizeFieldTerm(nameof(CodeSource.FilePath), oldFullPath), document); pendingChanges++; } } catch (IOException) { AddFileChangesToRetrySouce(fullPath, WatcherChangeTypes.Renamed, pendingRetrySource, oldFullPath); } } else if (IsDirectory(fullPath)) { // Rebuild All Sub Directory Index File, rename the index path var term = new PrefixQuery(GetNoneTokenizeFieldTerm(nameof(CodeSource.FilePath), oldFullPath)); var docs = CodeIndexSearcher.Search(config.LuceneIndexForCode, term, int.MaxValue); foreach (var doc in docs) { CodeIndexBuilder.UpdateCodeFilePath(doc, oldFullPath, fullPath); CodeIndexBuilder.UpdateIndex(config.LuceneIndexForCode, new Term(nameof(CodeSource.CodePK), doc.Get(nameof(CodeSource.CodePK))), doc); pendingChanges++; } } } catch (Exception ex) { log?.Error(ex.ToString()); } }
public IndexBuildResults UpdateIndex(FileInfo fileInfo, CancellationToken cancellationToken) { try { if (fileInfo.Exists) { cancellationToken.ThrowIfCancellationRequested(); var source = CodeSource.GetCodeSource(fileInfo, FilesContentHelper.ReadAllText(fileInfo.FullName)); var words = new HashSet <string>(); AddHintWords(words, source.Content); var doc = IndexBuilderHelper.GetDocumentFromSource(source); CodeIndexPool.UpdateIndex(GetNoneTokenizeFieldTerm(nameof(CodeSource.FilePath), source.FilePath), doc, out var rawDocuments); if (rawDocuments.Length >= 1) { var rawWords = new HashSet <string>(); AddHintWords(rawWords, GetCodeSourceFromDocument(rawDocuments[0]).Content); var wordsNeedToRemove = rawWords.Except(words).ToArray(); var wordsNeedToAdd = words.Except(rawWords); words = wordsNeedToAdd.ToHashSet(); Log.LogInformation($"{Name}: Find {wordsNeedToRemove.Length} Delete Candidates Words, {words.Count} Update Candidates Words With Path {source.FilePath}"); if (rawDocuments.Length > 1) { Log.LogError($"{Name}: Find {rawDocuments.Length} Documents With Path {source.FilePath} To Update"); } foreach (var needToDeleteWord in wordsNeedToRemove) { if (!CodeIndexPool.Exists(new TermQuery(new Term(GetCaseSensitiveField(nameof(CodeSource.Content)), needToDeleteWord)))) { HintIndexPool.DeleteIndex(new Term(nameof(CodeWord.Word), needToDeleteWord)); } } } else { Log.LogError($"{Name}: Find 0 Document To Update With Path {source.FilePath}, Create New Index"); } foreach (var word in words) { HintIndexPool.UpdateIndex(new Term(nameof(CodeWord.Word), word), new Document { new StringField(nameof(CodeWord.Word), word, Field.Store.YES), new StringField(nameof(CodeWord.WordLower), word.ToLowerInvariant(), Field.Store.YES) }); } Log.LogInformation($"{Name}: Update index For {source.FilePath} finished"); } return(IndexBuildResults.Successful); } catch (Exception ex) { Log.LogError($"{Name}: Update index for {fileInfo.FullName} failed, exception: " + ex); if (ex is IOException) { return(IndexBuildResults.FailedWithIOException); } else if (ex is OperationCanceledException) { throw; } return(IndexBuildResults.FailedWithError); } }