void AddHintWords(HashSet <string> hintWords, string content) { var words = WordSegmenter.GetWords(content, HintWordMinLength, HintWordMaxLength); foreach (var word in words) { hintWords.Add(word); } }
public static void BuildIndexByBatch(CodeIndexConfiguration config, bool triggerMerge, bool applyAllDeletes, bool needFlush, IEnumerable <FileInfo> fileInfos, bool deleteExistIndex, ILog log, out List <FileInfo> failedIndexFiles, int batchSize = 1000, bool needHint = true) { config.RequireNotNull(nameof(config)); fileInfos.RequireNotNull(nameof(fileInfos)); batchSize.RequireRange(nameof(batchSize), int.MaxValue, 50); var needDeleteExistIndex = deleteExistIndex && IndexExists(config.LuceneIndexForCode); var documents = new List <Document>(); failedIndexFiles = new List <FileInfo>(); foreach (var fileInfo in fileInfos) { try { if (fileInfo.Exists) { var source = CodeSource.GetCodeSource(fileInfo, FilesContentHelper.ReadAllText(fileInfo.FullName)); if (needDeleteExistIndex) { DeleteIndex(config.LuceneIndexForCode, new Term(nameof(CodeSource.FilePath) + Constants.NoneTokenizeFieldSuffix, source.FilePath)); } if (needHint) { WordsHintBuilder.AddWords(WordSegmenter.GetWords(source.Content)); } var doc = GetDocumentFromSource(source); documents.Add(doc); log?.Info($"Add index For {source.FilePath}"); } } catch (Exception ex) { failedIndexFiles.Add(fileInfo); log?.Error($"Add index for {fileInfo.FullName} failed, exception: " + ex.ToString()); } if (documents.Count >= batchSize) { BuildIndex(config, triggerMerge, applyAllDeletes, documents, needFlush, log); documents.Clear(); } } if (documents.Count > 0) { BuildIndex(config, triggerMerge, applyAllDeletes, documents, needFlush, log); } }
void AddHintWords(HashSet <string> hintWords, HashSet <string> wholeWords, string content) { var words = WordSegmenter.GetWords(content, HintWordMinLength, HintWordMaxLength); foreach (var word in words) { if (wholeWords.Add(word)) // Avoid Distinct Value { hintWords.Add(word); } } }