示例#1
0
        static void ProcessAndIndexWikipedia()
        {
            var reader       = new CorpusZipReader <IList <char> >(wikiPath, charDataSerializer);
            var index        = new DictionaryIndex <int>(rareWordThreshold: 5);
            var indexBuilder = new IndexBuilder <int, IEnumerable <int> >(index);
            var processor    = new WikitextProcessor();

            indexBuilder.IndexCorpus(processor.Transform(reader.Read()));

            Console.WriteLine("Serializing index...");
            using var file = File.Create(indexPath);
            index.Serialize(file);
        }
示例#2
0
        static void BuildExternalIndex()
        {
            PrepareOutputDirectory(externalIndexPath);

            var reader = new CorpusZipReader <IList <char> >(wikiPath, charDataSerializer);

            using var buildableIndex = new BlockedExternalBuildableIndex <int>(
                      DictonaryBasedExternalBuildableIndex <int> .GetCreateMethodWithVarintPostingsLists(),
                      externalIndexPath,
                      BlockSize);
            var indexBuilder = new IndexBuilder <int, IEnumerable <int> >(buildableIndex);
            var processor    = new WikitextProcessor();

            indexBuilder.IndexCorpus(processor.Transform(reader.Read()));

            using var index = buildableIndex.Build();

            var serializer = new ExternalIndexSerializer <int>();

            serializer.Serialize(externalIndexPath, index);
        }