static void IndexWikipedia() { var reader = new CorpusZipReader <int[]>(hashedPath, hashedDataSerializer); var index = new DictionaryIndex <int>(rareWordThreshold: RangeThreshold); var indexBuilder = new IndexBuilder <int, int[]>(index); indexBuilder.IndexCorpus(reader.Read()); using var file = File.Create(indexPath); index.Serialize(file); }
static void ProcessAndIndexWikipedia() { var reader = new CorpusZipReader <IList <char> >(wikiPath, charDataSerializer); var index = new DictionaryIndex <int>(rareWordThreshold: 5); var indexBuilder = new IndexBuilder <int, IEnumerable <int> >(index); var processor = new WikitextProcessor(); indexBuilder.IndexCorpus(processor.Transform(reader.Read())); Console.WriteLine("Serializing index..."); using var file = File.Create(indexPath); index.Serialize(file); }