public void CreateVectorExtractor_LargeCorpus()
        {
            var index = new DocumentIndex();

            using (var corpusStream = GetResource("shakespeare.txt"))
            {
                var corpus = new Corpus(corpusStream.Tokenise());

                int id = 0;

                index.IndexDocuments(corpus.Blocks.Select(b => new TokenisedTextDocument((id++).ToString(), b)));
            }

            var ve = index.CreateVectorExtractor(1024);

            var vect = ve.ExtractColumnVector(index.Tokeniser.Tokenise("love time fortune"));

            Console.WriteLine(vect);
        }
Exemple #2
0
        private void Append(TokenisedTextDocument document)
        {
            _index.IndexDocument(document);

            var corpus = new Corpus(document.Tokens);

            foreach (var block in corpus.Blocks)
            {
                _markovChain.AnalyseSequence(block.Select(b => b.Text));
            }
        }