示例#1
0
 public static void ProcessCorpus(string corpusPath, Regex filePattern, CorpusProcessDelegate process)
 {
     string[] dirs = Directory.GetDirectories(corpusPath, "*", SearchOption.TopDirectoryOnly);
     foreach (string dir in dirs)
     {
         TextDataCatagory catagory = GetCatagoryFromDir(dir);
         ProcessDir(dir, filePattern, catagory, process);
     }
 }
示例#2
0
        public static void ProcessCorpus(string corpusPath, Regex filePattern, IList <CorpusProcessor> processors)
        {
            CorpusProcessDelegate d = delegate(TextData data)
            {
                foreach (CorpusProcessor processor in processors)
                {
                    processor.Process(data);
                }
            };

            ProcessCorpus(corpusPath, filePattern, d);
        }
示例#3
0
        public static void ProcessFile(Stream stream, string path, TextDataCatagory catagoryHint, CorpusProcessDelegate process)
        {
            CorpusParser     parser = GetCorpusParserFromPath(path);
            IList <TextData> data   = parser.LoadData(stream, path, catagoryHint);

            foreach (TextData item in data)
            {
                process(item);
            }
        }
示例#4
0
        public static void ProcessDir(string path, Regex filePattern, TextDataCatagory catagoryHint, CorpusProcessDelegate process)
        {
            if (Directory.Exists(path))
            {
                string[] files = Directory.GetFiles(path, "*", SearchOption.AllDirectories);
                foreach (string file in files)
                {
                    if (file.Contains("[NOPARSE]"))
                    {
                        continue;
                    }

                    string fileName = Path.GetFileName(file);
                    if (filePattern.Matches(fileName).Count > 0)
                    {
                        if (Path.GetExtension(file).ToLower() == ".zip")
                        {
                            ProcessZip(file, filePattern, catagoryHint, process);
                        }
                        else
                        {
                            using (Stream s = File.OpenRead(file))
                            {
                                ProcessFile(s, file, catagoryHint, process);
                            }
                        }
                    }
                }
            }
        }
示例#5
0
 public static void ProcessZip(string path, Regex filePattern, TextDataCatagory catagoryHint, CorpusProcessDelegate process)
 {
     using (ZipArchive archive = ZipFile.OpenRead(path))
     {
         foreach (ZipArchiveEntry entry in archive.Entries)
         {
             if (filePattern.Matches(entry.Name).Count > 0)
             {
                 using (Stream s = entry.Open())
                 {
                     ProcessFile(s, path + "@" + entry.FullName, catagoryHint, process);
                 }
             }
         }
     }
 }
示例#6
0
 public static void ProcessCorpus(string corpusPath, CorpusProcessDelegate process)
 {
     ProcessCorpus(corpusPath, new Regex(".*"), process);
 }