public void Start() { if (startCheck()) { AnnotationSet wemas; ICTCLASAnnotator ictclasAno; CRFPPAnnotator crfppAno; Corpus corpus; Console.BackgroundColor = ConsoleColor.DarkBlue; Console.Clear(); writeTitle(); ConsoleColor color = Console.ForegroundColor; Console.ForegroundColor = ConsoleColor.Green; try { Console.WriteLine(MessageInitializing); wemas = new AnnotationSet(); wemas.Load(_annotationSet); ictclasAno = new ICTCLASAnnotator(wemas, null); crfppAno = new CRFPPAnnotator(wemas, new Model(_crfppModel)); crfppAno.SetCRFPPRootPath(_crfppDir); corpus = new Corpus(Encoding.GetEncoding(_xmlEncoding)); Console.Write(MessageLoadingCorpus); savePosition(); var documents = from file in Directory.GetFiles(_corpusDir) where file.ToUpper().EndsWith(".TXT") select file; if (documents.Count() == 0) { Console.WriteLine(); Console.WriteLine(MessageEmptyCorpus); Console.WriteLine(MessageDone); Console.ForegroundColor = color; pause(); return; } _sentenceFactory = new SentenceFactory(wemas); _sentenceFactory.InputLanguage = Language.SimplifiedChinese; _sentenceFactory.OutputEncoding = corpus.Encoding; int count = 0; foreach (var doc in documents) { Sentence[] sens = _sentenceFactory.GetSentences( new FileStream(doc, FileMode.Open)); WEMDocument wDoc = new WEMDocument(corpus.Encoding); wDoc.Name = doc; foreach (var sen in sens) { wDoc.AddSentence(sen); } corpus.AddDocument(wDoc); rewrite(String.Format("{0}%", (int)((double) ++count / documents.Count() * 100))); } Console.WriteLine("共加载{0}篇文档。", corpus.Documents.Count); Console.Write(MessageAnnotatingSeg); savePosition(); bool done = false; ictclasAno.AnnotationProgressChanged += (s, ea) => { rewrite(String.Format("{0}%", Math.Round(ea.ProgressInDouble, 2) * 100)); }; ictclasAno.AnnotationCompleted += (s, ea) => { if (ea.Error == null) { rewrite("100%"); Console.WriteLine(MessageAnnotatingEnt); crfppAno.AnnotationProgressChanged += (sen, args) => { Console.WriteLine(args.Message); }; crfppAno.AnnotationCompleted += (sen, args) => { if (args.Error == null) { Console.WriteLine( MessageAnnotationFinished); if (_reportsEnabled) Console.Write(MessageSavingDocsAndReps); else Console.Write(MessageSavingDocuments); savePosition(); int sCount = 0; foreach (var doc in corpus.Documents) { string reportFileName; string fileName = documents.ElementAt( corpus.Documents.IndexOf(doc)); FileInfo fi = new FileInfo(fileName); reportFileName = fileName = fi.Name; fileName = fileName.Remove( fileName.Length - 3); fileName = _outputDir + fileName + "xml"; doc.Save(fileName); if (_reportsEnabled) saveReport(doc, _reportsDir + reportFileName, wemas); rewrite(String.Format("{0}%", (int)((double)++sCount / corpus.Documents.Count * 100))); } Console.WriteLine(MessageDone); done = true; } else { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(); Console.WriteLine(MessageUnhandledException); Console.WriteLine(args.Error.Message); Console.WriteLine(args.Error.StackTrace); done = true; } }; if (crfppAno.Initialize()) { crfppAno.ProcessCorpusAsync(corpus); } else { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(MessageEntInitFailed); done = true; return; } } else { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(); Console.WriteLine(MessageUnhandledException); Console.WriteLine(ea.Error.Message); Console.WriteLine(ea.Error.StackTrace); done = true; } }; if (ictclasAno.Initialize()) { ictclasAno.ProcessCorpusAsync(corpus); while (!done) Thread.Sleep(50); } else { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(MessageSegInitFailed); } } catch (Exception ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(); Console.WriteLine(MessageUnhandledException); Console.WriteLine(ex.Message); } Console.ForegroundColor = color; //pause(); return; } else { //(); return; } }
static void ProcessCommand(string command) { string[] blocks = command.Split(); switch (blocks[0].ToLower()) { case "": return; case "exit": if (blocks.Length == 1) _exitFlag = true; else Console.WriteLine("Unknown Parameters."); return; case "clear": if (blocks.Length == 1) Console.Clear(); else Console.WriteLine("Unknown Parameters."); return; case "genas": AnnotationDictionary dict = new AnnotationDictionary(); AnnotationSet ans = new AnnotationSet("WEMAS"); dict.Load("WEMAS.xml"); foreach (var entry in dict._Dictionary) { ans.SetAnnotationDescription(entry.Key, entry.Value); } foreach (var sep in dict.SentenceSeparators) { ans.AddSentenceSeparator(sep); } ans.Save("WEMAS.wemas"); return; case "testas": AnnotationSet ans2 = new AnnotationSet("WEMAS"); ans2.Load("WEMAS.wemas"); ans2.Description = "WEB ENTITY MINER Default Annotation Set."; ans2.Save("WEMAS2.wemas"); return; case "testws": AnnotationSet wemas = new AnnotationSet(); wemas.Load("WEMAS.wemas"); ICTCLASAnnotator ano = new ICTCLASAnnotator(wemas, null); CRFPPAnnotator crfAno = new CRFPPAnnotator(wemas, new Model( AppDomain.CurrentDomain.BaseDirectory + "model.crfppmodel")); Corpus c = new Corpus(Encoding.UTF8); SentenceFactory.AnnotationSet = wemas; SentenceFactory.InputLanguage = Language.SimplifiedChinese; SentenceFactory.OutputEncoding = Encoding.UTF8; var sens = SentenceFactory.GetSentences( new FileStream("utf8.txt", FileMode.Open, FileAccess.Read)); WEMDocument doc = new WEMDocument(Encoding.UTF8); foreach (var sen in sens) { doc.AddSentence(sen); } c.AddDocument(doc); try { ano.ProcessCorpus(c); crfAno.ProcessCorpus(c); } catch (Exception ex) { Console.WriteLine("Unhandled Exception:\n{0}", ex.Message); } doc.Save("RESULT.xml"); foreach (var sententce in doc.Sentences) { foreach (var word in sententce.Words) { if (word is Entity) { Console.WriteLine("{0}/ENTITY:{1}", word.Content, wemas[((Entity)word).EntityId]); } else { Console.WriteLine("{0}", word.Content); } } } return; default: Console.WriteLine("Unknown Command: '{0}'.", blocks[0]); return; } }