private void IndexDocuments(IFileReader reader) { var documents = reader.ReadFile(); var ppc = new PreprocessClient(false, reader is EnglishReader); var documentTokensBulk = ppc.GetTokens(documents); foreach (var docTokens in documentTokensBulk.OrderBy(x => x.DocumentId)) { MainIndex.AddDocumentToIndex(docTokens); } MainIndex.PureDocumentsById = documents.ToDictionary(x => x.Id); //MainIndex.SortPostings(); }
public void Process() { if (HasTitle) { TitleIndex = new PositionalIndex(); } ContentIndex = new PositionalIndex(); var pureDocs = Documents.Values.Select(x => x.Document).ToList(); Dictionary <int, DocumentTokens> titleTokensBulk = null; if (HasTitle) { var titlePreprocessClient = new PreprocessClient(true, true); titleTokensBulk = titlePreprocessClient.GetTokens(pureDocs).ToDictionary(x => x.DocumentId); } var contentPreprocessClient = new PreprocessClient(false, true); var contentTokensBulk = contentPreprocessClient.GetTokens(pureDocs).ToDictionary(x => x.DocumentId); foreach (var docId in contentTokensBulk.Keys.OrderBy(x => x)) { if (HasTitle) { TitleIndex.AddDocumentToIndex(titleTokensBulk[docId]); } ContentIndex.AddDocumentToIndex(contentTokensBulk[docId]); if (HasTitle) { Documents[docId].SetTokens(titleTokensBulk[docId], contentTokensBulk[docId]); } else { Documents[docId].SetTokens(null, contentTokensBulk[docId]); } } foreach (var document in Documents.Values) { document.CreateVector(TitleIndex, ContentIndex); } }