Ejemplo n.º 1
0
        private void IndexDocuments(IFileReader reader)
        {
            var documents          = reader.ReadFile();
            var ppc                = new PreprocessClient(false, reader is EnglishReader);
            var documentTokensBulk = ppc.GetTokens(documents);


            foreach (var docTokens in documentTokensBulk.OrderBy(x => x.DocumentId))
            {
                MainIndex.AddDocumentToIndex(docTokens);
            }

            MainIndex.PureDocumentsById = documents.ToDictionary(x => x.Id);
            //MainIndex.SortPostings();
        }
Ejemplo n.º 2
0
        public void Process()
        {
            if (HasTitle)
            {
                TitleIndex = new PositionalIndex();
            }

            ContentIndex = new PositionalIndex();

            var pureDocs = Documents.Values.Select(x => x.Document).ToList();

            Dictionary <int, DocumentTokens> titleTokensBulk = null;

            if (HasTitle)
            {
                var titlePreprocessClient = new PreprocessClient(true, true);
                titleTokensBulk = titlePreprocessClient.GetTokens(pureDocs).ToDictionary(x => x.DocumentId);
            }

            var contentPreprocessClient = new PreprocessClient(false, true);
            var contentTokensBulk       = contentPreprocessClient.GetTokens(pureDocs).ToDictionary(x => x.DocumentId);


            foreach (var docId in contentTokensBulk.Keys.OrderBy(x => x))
            {
                if (HasTitle)
                {
                    TitleIndex.AddDocumentToIndex(titleTokensBulk[docId]);
                }

                ContentIndex.AddDocumentToIndex(contentTokensBulk[docId]);

                if (HasTitle)
                {
                    Documents[docId].SetTokens(titleTokensBulk[docId], contentTokensBulk[docId]);
                }
                else
                {
                    Documents[docId].SetTokens(null, contentTokensBulk[docId]);
                }
            }

            foreach (var document in Documents.Values)
            {
                document.CreateVector(TitleIndex, ContentIndex);
            }
        }