/// <summary>main entry of coreference system.</summary> /// <param name="document">Input document for coref format (Annotation and optional information)</param> /// <param name="output">For output of coref system (conll format and log. list size should be 4.)</param> /// <returns>Map of coref chain ID and corresponding chain</returns> /// <exception cref="System.Exception"/> public virtual IDictionary <int, CorefChain> Coref(Document document, StringBuilder[] output) { if (HybridCorefProperties.PrintMDLog(props)) { Redwood.Log(HybridCorefPrinter.PrintMentionDetectionLog(document)); } if (HybridCorefProperties.DoScore(props)) { output[0] = (new StringBuilder()).Append(CorefPrinter.PrintConllOutput(document, true)); // gold output[1] = (new StringBuilder()).Append(CorefPrinter.PrintConllOutput(document, false)); } // before coref output[3] = new StringBuilder(); // log from sieves foreach (Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve sieve in sieves) { CorefUtils.CheckForInterrupt(); output[3].Append(sieve.ResolveMention(document, dictionaries, props)); } // post processing if (HybridCorefProperties.DoPostProcessing(props)) { PostProcessing(document); } if (HybridCorefProperties.DoScore(props)) { output[2] = (new StringBuilder()).Append(CorefPrinter.PrintConllOutput(document, false, true)); } // after coref return(MakeCorefOutput(document)); }
/// <exception cref="System.Exception"/> public HybridCorefSystem(Properties props, Edu.Stanford.Nlp.Coref.Data.Dictionaries dictionaries) { this.props = props; this.dictionaries = dictionaries; sieves = Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.LoadSieves(props); // set semantics loading foreach (Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve sieve in sieves) { if (sieve.classifierType == Sieve.ClassifierType.Rule) { continue; } if (HybridCorefProperties.UseWordEmbedding(props, sieve.sievename)) { props.SetProperty(HybridCorefProperties.LoadWordEmbeddingProp, "true"); } } }
/// <exception cref="System.Exception"/> public static void RunCoref(Properties props) { /* * property, environment setting */ Redwood.HideChannelsEverywhere("debug-cluster", "debug-mention", "debug-preprocessor", "debug-docreader", "debug-mergethres", "debug-featureselection", "debug-md"); int nThreads = HybridCorefProperties.GetThreadCounts(props); string timeStamp = Calendar.GetInstance().GetTime().ToString().ReplaceAll("\\s", "-").ReplaceAll(":", "-"); Logger logger = Logger.GetLogger(typeof(Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem).FullName); // set log file path if (props.Contains(HybridCorefProperties.LogProp)) { File logFile = new File(props.GetProperty(HybridCorefProperties.LogProp)); RedwoodConfiguration.Current().Handlers(RedwoodConfiguration.Handlers.File(logFile)).Apply(); Redwood.Log("Starting coref log"); } log.Info(props.ToString()); if (HybridCorefProperties.CheckMemory(props)) { CheckMemoryUsage(); } Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem cs = new Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem(props); /* * output setting */ // prepare conll output string goldOutput = null; string beforeCorefOutput = null; string afterCorefOutput = null; PrintWriter writerGold = null; PrintWriter writerBeforeCoref = null; PrintWriter writerAfterCoref = null; if (HybridCorefProperties.DoScore(props)) { string pathOutput = CorefProperties.ConllOutputPath(props); (new File(pathOutput)).Mkdir(); goldOutput = pathOutput + "output-" + timeStamp + ".gold.txt"; beforeCorefOutput = pathOutput + "output-" + timeStamp + ".predicted.txt"; afterCorefOutput = pathOutput + "output-" + timeStamp + ".coref.predicted.txt"; writerGold = new PrintWriter(new FileOutputStream(goldOutput)); writerBeforeCoref = new PrintWriter(new FileOutputStream(beforeCorefOutput)); writerAfterCoref = new PrintWriter(new FileOutputStream(afterCorefOutput)); } // run coref MulticoreWrapper <Pair <Document, Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem>, StringBuilder[]> wrapper = new MulticoreWrapper <Pair <Document, Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem>, StringBuilder[]>(nThreads, new _IThreadsafeProcessor_134 ()); // conll output and logs DateTime startTime = null; if (HybridCorefProperties.CheckTime(props)) { startTime = new DateTime(); System.Console.Error.Printf("END-TO-END COREF Start time: %s\n", startTime); } // run processes int docCnt = 0; while (true) { Document document = cs.docMaker.NextDoc(); if (document == null) { break; } wrapper.Put(Pair.MakePair(document, cs)); docCnt = LogOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt); } // Finished reading the input. Wait for jobs to finish wrapper.Join(); docCnt = LogOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt); IOUtils.CloseIgnoringExceptions(writerGold); IOUtils.CloseIgnoringExceptions(writerBeforeCoref); IOUtils.CloseIgnoringExceptions(writerAfterCoref); if (HybridCorefProperties.CheckTime(props)) { System.Console.Error.Printf("END-TO-END COREF Elapsed time: %.3f seconds\n", (((new DateTime()).GetTime() - startTime.GetTime()) / 1000F)); } // System.err.printf("CORENLP PROCESS TIME TOTAL: %.3f seconds\n", cs.mentionExtractor.corenlpProcessTime); if (HybridCorefProperties.CheckMemory(props)) { CheckMemoryUsage(); } // scoring if (HybridCorefProperties.DoScore(props)) { string summary = CorefScorer.GetEvalSummary(CorefProperties.GetScorerPath(props), goldOutput, beforeCorefOutput); CorefScorer.PrintScoreSummary(summary, logger, false); summary = CorefScorer.GetEvalSummary(CorefProperties.GetScorerPath(props), goldOutput, afterCorefOutput); CorefScorer.PrintScoreSummary(summary, logger, true); CorefScorer.PrintFinalConllScore(summary); } }