static void Main(string[] args) { string result_folder = @"f:\tmp"; List <string> parsed_sentences = new List <string>(); int MAX_SAMPLE = int.MaxValue; int MAX_LEN = int.MaxValue; string dictionary_path = ""; string syntax_templates = ""; string[] filters = { "3" }; #region Command_Line_Options for (int i = 0; i < args.Length; ++i) { if (args[i] == "-parsing") { parsed_sentences.Add(args[i + 1]); i++; } else if (args[i] == "-dict") { dictionary_path = args[i + 1]; i++; } else if (args[i] == "-templates") { syntax_templates = args[i + 1]; i++; } else if (args[i] == "-output") { result_folder = args[i + 1]; i++; } else if (args[i] == "-max_samples") { MAX_SAMPLE = int.Parse(args[i + 1]); i++; } else if (args[i] == "-skip") { nb_skip = int.Parse(args[i + 1]); i++; } else if (args[i] == "-max_len") { MAX_LEN = int.Parse(args[i + 1]); i++; } else if (args[i] == "-filter") { filters = args[i + 1].Split(','); i++; } else { throw new ApplicationException(string.Format("Unknown option {0}", args[i])); } } #endregion Command_Line_Options preprocessor = new Preprocessor(); if (!string.IsNullOrEmpty(syntax_templates)) { syntax_checker.LoadTemplates(syntax_templates); } Console.WriteLine("Loading dictionary {0}", dictionary_path); SolarixGrammarEngineNET.GrammarEngine2 gren = new SolarixGrammarEngineNET.GrammarEngine2(); gren.Load(dictionary_path, true); // Файл для сохранения отобранных предложений-фактов. wrt_samples = new System.IO.StreamWriter(System.IO.Path.Combine(result_folder, "facts.txt")); // Предложения, которые не прошли детальную проверку синтаксической структуры wrt_skipped = new System.IO.StreamWriter(System.IO.Path.Combine(result_folder, "skipped.txt")); // Фильтр для предиката, с возможными значениями "3", "2s" и "1s" string filter_verb = filters.Where(z => "3 1s 2s".Split(' ').Contains(z)).FirstOrDefault(); // Фильтр типа предложения. Допустимые значение - пустое или "q" string filter_sent = (filters.Where(z => "q".Split(' ').Contains(z)).FirstOrDefault()) ?? ""; DateTime start_time = DateTime.Now; #region Processing_All_Files foreach (string mask in parsed_sentences) { string[] files = null; if (System.IO.Directory.Exists(mask)) { files = System.IO.Directory.GetFiles(mask, "*.parsing.txt"); } else if (mask.IndexOfAny("*?".ToCharArray()) != -1) { files = System.IO.Directory.GetFiles(System.IO.Path.GetDirectoryName(mask), System.IO.Path.GetFileName(mask)); } else { files = new string[1] { mask }; } Console.WriteLine("Number of parsing files={0}", files.Length); foreach (string file in files) { if (sample_count >= MAX_SAMPLE) { break; } Console.WriteLine("Processing {0}...", file); using (Sentences src = new Sentences(file)) { while (src.Next() && sample_count < MAX_SAMPLE) { Sentence sent = src.GetFetched(); sample_count++; if (sent.root == null) { continue; } if (sample_count > 0 && (sample_count % 10000) == 0) { Console.Write("{0} samples extracted\r", sample_count); } if (sample_count >= nb_skip) { //Console.WriteLine("DEBUG [{0}] {1}", sample_count, sent.GetText()); ProcessSentence(gren, sent, MAX_LEN, filter_verb, filter_sent); } } } } } #endregion Processing_All_Files wrt_samples.Close(); wrt_skipped.Close(); return; }