Пример #1
0
        static void IndexWikipedia()
        {
            var reader       = new CorpusZipReader <int[]>(hashedPath, hashedDataSerializer);
            var index        = new DictionaryIndex <int>(rareWordThreshold: RangeThreshold);
            var indexBuilder = new IndexBuilder <int, int[]>(index);

            indexBuilder.IndexCorpus(reader.Read());

            using var file = File.Create(indexPath);
            index.Serialize(file);
        }
Пример #2
0
        static void ProcessAndIndexWikipedia()
        {
            var reader       = new CorpusZipReader <IList <char> >(wikiPath, charDataSerializer);
            var index        = new DictionaryIndex <int>(rareWordThreshold: 5);
            var indexBuilder = new IndexBuilder <int, IEnumerable <int> >(index);
            var processor    = new WikitextProcessor();

            indexBuilder.IndexCorpus(processor.Transform(reader.Read()));

            Console.WriteLine("Serializing index...");
            using var file = File.Create(indexPath);
            index.Serialize(file);
        }