private static MockFileSystem SerializeCorpus(IList <Block <string> > corpus) { var fileSystem = new MockFileSystem(); fileSystem.Directory.CreateDirectory(path); var writer = new CorpusZipWriter <string>(path, new StringDocumentDataSerializer(), fileSystem); writer.Write(corpus); return(fileSystem); }
static void TransformWikiDump() { string pathToSave = wikiPath; PrepareOutputDirectory(pathToSave); using var xmlReader = new WikiDumpXmlReader(wikiDumpFilePath); ICorpusReader <string> reader = new WikipediaReader( xmlReader, WikipediaReader.DefaultFilter, (ushort)BlockSize, CorpusSize); ICorpusWriter <string> writer = new CorpusZipWriter <string>(pathToSave, stringDataSerializer); writer.Write(reader.Read()); }