public static void ProcessCorpus(string corpusPath, Regex filePattern, CorpusProcessDelegate process) { string[] dirs = Directory.GetDirectories(corpusPath, "*", SearchOption.TopDirectoryOnly); foreach (string dir in dirs) { TextDataCatagory catagory = GetCatagoryFromDir(dir); ProcessDir(dir, filePattern, catagory, process); } }
public static void ProcessCorpus(string corpusPath, Regex filePattern, IList <CorpusProcessor> processors) { CorpusProcessDelegate d = delegate(TextData data) { foreach (CorpusProcessor processor in processors) { processor.Process(data); } }; ProcessCorpus(corpusPath, filePattern, d); }
public static void ProcessFile(Stream stream, string path, TextDataCatagory catagoryHint, CorpusProcessDelegate process) { CorpusParser parser = GetCorpusParserFromPath(path); IList <TextData> data = parser.LoadData(stream, path, catagoryHint); foreach (TextData item in data) { process(item); } }
public static void ProcessDir(string path, Regex filePattern, TextDataCatagory catagoryHint, CorpusProcessDelegate process) { if (Directory.Exists(path)) { string[] files = Directory.GetFiles(path, "*", SearchOption.AllDirectories); foreach (string file in files) { if (file.Contains("[NOPARSE]")) { continue; } string fileName = Path.GetFileName(file); if (filePattern.Matches(fileName).Count > 0) { if (Path.GetExtension(file).ToLower() == ".zip") { ProcessZip(file, filePattern, catagoryHint, process); } else { using (Stream s = File.OpenRead(file)) { ProcessFile(s, file, catagoryHint, process); } } } } } }
public static void ProcessZip(string path, Regex filePattern, TextDataCatagory catagoryHint, CorpusProcessDelegate process) { using (ZipArchive archive = ZipFile.OpenRead(path)) { foreach (ZipArchiveEntry entry in archive.Entries) { if (filePattern.Matches(entry.Name).Count > 0) { using (Stream s = entry.Open()) { ProcessFile(s, path + "@" + entry.FullName, catagoryHint, process); } } } } }
public static void ProcessCorpus(string corpusPath, CorpusProcessDelegate process) { ProcessCorpus(corpusPath, new Regex(".*"), process); }