/// <exception cref="System.Exception"/> public static void DoTraining(Properties props) { SetTrainingPath(props); Dictionaries dictionaries = new Dictionaries(props); SetDataPath("train"); wordCountsFile = trainingPath + "train/word_counts.ser"; CorefProperties.SetInput(props, CorefProperties.Dataset.Train); Preprocess(props, dictionaries, true); SetDataPath("dev"); CorefProperties.SetInput(props, CorefProperties.Dataset.Dev); Preprocess(props, dictionaries, false); SetDataPath("train"); dictionaries = null; PairwiseModel classificationModel = PairwiseModel.NewBuilder(ClassificationModel, MetaFeatureExtractor.NewBuilder().Build()).Build(); PairwiseModel rankingModel = PairwiseModel.NewBuilder(RankingModel, MetaFeatureExtractor.NewBuilder().Build()).Build(); PairwiseModel anaphoricityModel = PairwiseModel.NewBuilder(AnaphoricityModel, MetaFeatureExtractor.AnaphoricityMFE()).TrainingExamples(5000000).Build(); PairwiseModelTrainer.TrainRanking(rankingModel); PairwiseModelTrainer.TrainClassification(classificationModel, false); PairwiseModelTrainer.TrainClassification(anaphoricityModel, true); SetDataPath("dev"); PairwiseModelTrainer.Test(classificationModel, predictionsName, false); PairwiseModelTrainer.Test(rankingModel, predictionsName, false); PairwiseModelTrainer.Test(anaphoricityModel, predictionsName, true); new Clusterer().DoTraining(ClusteringModelName); }
/// <summary>Main method of mention detection.</summary> /// <remarks> /// Main method of mention detection. /// Extract all NP, PRP or NE, and filter out by manually written patterns. /// </remarks> public override IList <IList <Mention> > FindMentions(Annotation doc, Dictionaries dict, Properties props) { IList <IList <Mention> > predictedMentions = new List <IList <Mention> >(); ICollection <string> neStrings = Generics.NewHashSet(); IList <ICollection <IntPair> > mentionSpanSetList = Generics.NewArrayList(); IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation)); foreach (ICoreMap s in sentences) { IList <Mention> mentions = new List <Mention>(); predictedMentions.Add(mentions); ICollection <IntPair> mentionSpanSet = Generics.NewHashSet(); ICollection <IntPair> namedEntitySpanSet = Generics.NewHashSet(); ExtractPremarkedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet); HybridCorefMentionFinder.ExtractNamedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet); ExtractNPorPRPFromDependency(s, mentions, mentionSpanSet, namedEntitySpanSet); AddNamedEntityStrings(s, neStrings, namedEntitySpanSet); mentionSpanSetList.Add(mentionSpanSet); } // extractNamedEntityModifiers(sentences, mentionSpanSetList, predictedMentions, neStrings); for (int i = 0; i < sentences.Count; i++) { FindHead(sentences[i], predictedMentions[i]); } // mention selection based on document-wise info RemoveSpuriousMentions(doc, predictedMentions, dict, CorefProperties.RemoveNestedMentions(props), lang); // if this is for MD training, skip classification if (!CorefProperties.IsMentionDetectionTraining(props)) { mdClassifier.ClassifyMentions(predictedMentions, dict, props); } return(predictedMentions); }
/// <summary>When mention boundaries are given</summary> public virtual IList <IList <Mention> > FilterPredictedMentions(IList <IList <Mention> > allGoldMentions, Annotation doc, Dictionaries dict, Properties props) { IList <IList <Mention> > predictedMentions = new List <IList <Mention> >(); for (int i = 0; i < allGoldMentions.Count; i++) { ICoreMap s = doc.Get(typeof(CoreAnnotations.SentencesAnnotation))[i]; IList <Mention> goldMentions = allGoldMentions[i]; IList <Mention> mentions = new List <Mention>(); predictedMentions.Add(mentions); Sharpen.Collections.AddAll(mentions, goldMentions); FindHead(s, mentions); // todo [cdm 2013]: This block seems to do nothing - the two sets are never used ICollection <IntPair> mentionSpanSet = Generics.NewHashSet(); ICollection <IntPair> namedEntitySpanSet = Generics.NewHashSet(); foreach (Mention m in mentions) { mentionSpanSet.Add(new IntPair(m.startIndex, m.endIndex)); if (!m.headWord.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)).Equals("O")) { namedEntitySpanSet.Add(new IntPair(m.startIndex, m.endIndex)); } } SetBarePlural(mentions); } RemoveSpuriousMentions(doc, predictedMentions, dict, CorefProperties.RemoveNestedMentions(props), lang); return(predictedMentions); }
public CorefAnnotator(Properties props) { this.props = props; try { // if user tries to run with coref.language = ENGLISH and coref.algorithm = hybrid, throw Exception // we do not support those settings at this time if (CorefProperties.Algorithm(props).Equals(CorefProperties.CorefAlgorithmType.Hybrid) && CorefProperties.GetLanguage(props).Equals(Locale.English)) { log.Error("Error: coref.algorithm=hybrid is not supported for English, " + "please change coref.algorithm or coref.language"); throw new Exception(); } // suppress props.SetProperty("coref.printConLLLoadingMessage", "false"); corefSystem = new CorefSystem(props); props.Remove("coref.printConLLLoadingMessage"); } catch (Exception e) { log.Error("Error creating CorefAnnotator...terminating pipeline construction!"); log.Error(e); throw new Exception(e); } // unless custom mention detection is set, just use the default coref mention detector performMentionDetection = !PropertiesUtils.GetBool(props, "coref.useCustomMentionDetection", false); if (performMentionDetection) { mentionAnnotator = new CorefMentionAnnotator(props); } }
/// <exception cref="System.TypeLoadException"/> /// <exception cref="System.IO.IOException"/> private CorefMentionFinder GetMentionFinder(Properties props, IHeadFinder headFinder) { switch (CorefProperties.MdType(props)) { case CorefProperties.MentionDetectionType.Dependency: { mdName = "dependency"; return(new DependencyCorefMentionFinder(props)); } case CorefProperties.MentionDetectionType.Hybrid: { mdName = "hybrid"; mentionAnnotatorRequirements.Add(typeof(TreeCoreAnnotations.TreeAnnotation)); mentionAnnotatorRequirements.Add(typeof(CoreAnnotations.BeginIndexAnnotation)); mentionAnnotatorRequirements.Add(typeof(CoreAnnotations.EndIndexAnnotation)); return(new HybridCorefMentionFinder(headFinder, props)); } case CorefProperties.MentionDetectionType.Rule: default: { mentionAnnotatorRequirements.Add(typeof(TreeCoreAnnotations.TreeAnnotation)); mentionAnnotatorRequirements.Add(typeof(CoreAnnotations.BeginIndexAnnotation)); mentionAnnotatorRequirements.Add(typeof(CoreAnnotations.EndIndexAnnotation)); mdName = "rule"; return(new RuleBasedCorefMentionFinder(headFinder, props)); } } }
/// <exception cref="System.TypeLoadException"/> /// <exception cref="System.IO.IOException"/> public DocumentMaker(Properties props, Dictionaries dictionaries) { this.props = props; this.dict = dictionaries; reader = GetDocumentReader(props); headFinder = CorefProperties.GetHeadFinder(props); md = CorefProperties.UseGoldMentions(props) ? new RuleBasedCorefMentionFinder(headFinder, props) : null; }
public FeatureExtractor(Properties props, Dictionaries dictionaries, Compressor <string> compressor, ICollection <string> vocabulary) { this.dictionaries = dictionaries; this.compressor = compressor; this.vocabulary = vocabulary; this.useDocSource = CorefProperties.Conll(props); this.useConstituencyParse = CorefProperties.UseConstituencyParse(props); }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties(new string[] { "-props", args[0] }); Dictionaries dictionaries = new Dictionaries(props); CorefProperties.SetInput(props, CorefProperties.Dataset.Train); new MentionDetectionEvaluator().Run(props, dictionaries); }
public NeuralCorefAlgorithm(Properties props, Dictionaries dictionaries) { greedyness = NeuralCorefProperties.Greedyness(props); maxMentionDistance = CorefProperties.MaxMentionDistance(props); maxMentionDistanceWithStringMatch = CorefProperties.MaxMentionDistanceWithStringMatch(props); model = IOUtils.ReadObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(log, "Loading coref model", NeuralCorefProperties.ModelPath(props)); embeddingExtractor = new EmbeddingExtractor(CorefProperties.Conll(props), IOUtils.ReadObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(log, "Loading coref embeddings", NeuralCorefProperties.PretrainedEmbeddingsPath(props)), model.GetWordEmbeddings ()); featureExtractor = new CategoricalFeatureExtractor(props, dictionaries); }
/// <exception cref="System.Exception"/> public static void ExportData(string outputPath, CorefProperties.Dataset dataset, Properties props, Dictionaries dictionaries) { CorefProperties.SetInput(props, dataset); string dataPath = outputPath + "/data_raw/"; string goldClusterPath = outputPath + "/gold/"; IOUtils.EnsureDir(new File(outputPath)); IOUtils.EnsureDir(new File(dataPath)); IOUtils.EnsureDir(new File(goldClusterPath)); new Edu.Stanford.Nlp.Coref.Neural.NeuralCorefDataExporter(props, dictionaries, dataPath + dataset.ToString().ToLower(), goldClusterPath + dataset.ToString().ToLower()).Run(props, dictionaries); }
/// <summary>Main method of mention detection.</summary> /// <remarks> /// Main method of mention detection. /// Extract all NP, PRP or NE, and filter out by manually written patterns. /// </remarks> public override IList <IList <Mention> > FindMentions(Annotation doc, Dictionaries dict, Properties props) { IList <IList <Mention> > predictedMentions = new List <IList <Mention> >(); ICollection <string> neStrings = Generics.NewHashSet(); IList <ICollection <IntPair> > mentionSpanSetList = Generics.NewArrayList(); IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation)); // extract premarked mentions, NP/PRP, named entity, enumerations foreach (ICoreMap s in sentences) { IList <Mention> mentions = new List <Mention>(); predictedMentions.Add(mentions); ICollection <IntPair> mentionSpanSet = Generics.NewHashSet(); ICollection <IntPair> namedEntitySpanSet = Generics.NewHashSet(); ExtractPremarkedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet); ExtractNamedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet); ExtractNPorPRP(s, mentions, mentionSpanSet, namedEntitySpanSet); ExtractEnumerations(s, mentions, mentionSpanSet, namedEntitySpanSet); AddNamedEntityStrings(s, neStrings, namedEntitySpanSet); mentionSpanSetList.Add(mentionSpanSet); } if (CorefProperties.LiberalMD(props)) { ExtractNamedEntityModifiers(sentences, mentionSpanSetList, predictedMentions, neStrings); } // find head for (int i = 0; i < sz; i++) { FindHead(sentences[i], predictedMentions[i]); SetBarePlural(predictedMentions[i]); } // mention selection based on document-wise info if (lang == Locale.English && !CorefProperties.LiberalMD(props)) { RemoveSpuriousMentionsEn(doc, predictedMentions, dict); } else { if (lang == Locale.Chinese) { if (CorefProperties.LiberalMD(props)) { RemoveSpuriousMentionsZhSimple(doc, predictedMentions, dict); } else { RemoveSpuriousMentionsZh(doc, predictedMentions, dict, CorefProperties.RemoveNestedMentions(props)); } } } return(predictedMentions); }
public NeuralCorefDataExporter(Properties props, Dictionaries dictionaries, string dataPath, string goldClusterPath) { conll = CorefProperties.Conll(props); this.dictionaries = dictionaries; try { dataWriter = IOUtils.GetPrintWriter(dataPath); goldClusterWriter = IOUtils.GetPrintWriter(goldClusterPath); } catch (Exception e) { throw new Exception("Error creating data exporter", e); } }
public virtual ICollection <Type> Requires() { ICollection <Type> requirements = new HashSet <Type>(Arrays.AsList(typeof(CoreAnnotations.TextAnnotation), typeof(CoreAnnotations.TokensAnnotation), typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), typeof(CoreAnnotations.CharacterOffsetEndAnnotation ), typeof(CoreAnnotations.IndexAnnotation), typeof(CoreAnnotations.ValueAnnotation), typeof(CoreAnnotations.SentencesAnnotation), typeof(CoreAnnotations.SentenceIndexAnnotation), typeof(CoreAnnotations.PartOfSpeechAnnotation), typeof(CoreAnnotations.LemmaAnnotation ), typeof(CoreAnnotations.NamedEntityTagAnnotation), typeof(CoreAnnotations.EntityTypeAnnotation), typeof(CoreAnnotations.MentionsAnnotation), typeof(CoreAnnotations.EntityMentionIndexAnnotation), typeof(CoreAnnotations.CoarseNamedEntityTagAnnotation ), typeof(CoreAnnotations.FineGrainedNamedEntityTagAnnotation), typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation))); if (CorefProperties.MdType(this.props) != CorefProperties.MentionDetectionType.Dependency) { requirements.Add(typeof(TreeCoreAnnotations.TreeAnnotation)); requirements.Add(typeof(CoreAnnotations.CategoryAnnotation)); } if (!performMentionDetection) { requirements.Add(typeof(CorefCoreAnnotations.CorefMentionsAnnotation)); } return(Java.Util.Collections.UnmodifiableSet(requirements)); }
private static IDocReader GetDocumentReader(Properties props) { string corpusPath = CorefProperties.GetInputPath(props); if (corpusPath == null) { return(null); } CoNLLDocumentReader.Options options = new CoNLLDocumentReader.Options(); if (!PropertiesUtils.GetBool(props, "coref.printConLLLoadingMessage", true)) { options.printConLLLoadingMessage = false; } options.annotateTokenCoref = false; string conllFileFilter = props.GetProperty("coref.conllFileFilter", ".*_auto_conll$"); options.SetFilter(conllFileFilter); options.lang = CorefProperties.GetLanguage(props); return(new CoNLLDocumentReader(corpusPath, options)); }
private StanfordCoreNLP GetStanfordCoreNLP(Properties props) { if (coreNLP != null) { return(coreNLP); } Properties pipelineProps = new Properties(props); if (CorefProperties.Conll(props)) { pipelineProps.SetProperty("annotators", (CorefProperties.GetLanguage(props) == Locale.Chinese ? "lemma, ner" : "lemma") + (CorefProperties.UseGoldMentions(props) ? string.Empty : ", coref.mention")); pipelineProps.SetProperty("ner.applyFineGrained", "false"); } else { pipelineProps.SetProperty("annotators", "pos, lemma, ner, " + (CorefProperties.UseConstituencyParse(props) ? "parse" : "depparse") + (CorefProperties.UseGoldMentions(props) ? string.Empty : ", coref.mention")); pipelineProps.SetProperty("ner.applyFineGrained", "false"); } return(coreNLP = new StanfordCoreNLP(pipelineProps, false)); }
/// <exception cref="System.Exception"/> public virtual Document NextDoc() { InputDoc input = reader.NextDoc(); if (input == null) { return(null); } if (!CorefProperties.UseConstituencyParse(props)) { foreach (ICoreMap sentence in input.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { sentence.Remove(typeof(TreeCoreAnnotations.TreeAnnotation)); } } GetStanfordCoreNLP(props).Annotate(input.annotation); if (CorefProperties.Conll(props)) { input.annotation.Set(typeof(CoreAnnotations.UseMarkedDiscourseAnnotation), true); } return(MakeDocument(input)); }
public CorefMentionAnnotator(Properties props) { try { corefProperties = props; //System.out.println("corefProperties: "+corefProperties); dictionaries = new Dictionaries(props); //System.out.println("got dictionaries"); headFinder = CorefProperties.GetHeadFinder(props); //System.out.println("got head finder"); md = GetMentionFinder(props, headFinder); log.Info("Using mention detector type: " + mdName); Sharpen.Collections.AddAll(mentionAnnotatorRequirements, Arrays.AsList(typeof(CoreAnnotations.TokensAnnotation), typeof(CoreAnnotations.SentencesAnnotation), typeof(CoreAnnotations.PartOfSpeechAnnotation), typeof(CoreAnnotations.NamedEntityTagAnnotation ), typeof(CoreAnnotations.EntityTypeAnnotation), typeof(CoreAnnotations.IndexAnnotation), typeof(CoreAnnotations.TextAnnotation), typeof(CoreAnnotations.ValueAnnotation), typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), typeof( SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation))); } catch (Exception e) { log.Info("Error with building coref mention annotator!"); log.Info(e); } }
public CategoricalFeatureExtractor(Properties props, Dictionaries dictionaries) { this.dictionaries = dictionaries; conll = CorefProperties.Conll(props); if (conll) { genres = new Dictionary <string, int>(); genres["bc"] = 0; genres["bn"] = 1; genres["mz"] = 2; genres["nw"] = 3; bool english = CorefProperties.GetLanguage(props) == Locale.English; if (english) { genres["pt"] = 4; } genres["tc"] = english ? 5 : 4; genres["wb"] = english ? 6 : 5; } else { genres = null; } }
/// <exception cref="System.Exception"/> public virtual Document MakeDocument(InputDoc input) { IList <IList <Mention> > mentions = new List <IList <Mention> >(); if (CorefProperties.UseGoldMentions(props)) { IList <ICoreMap> sentences = input.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); for (int i = 0; i < sentences.Count; i++) { ICoreMap sentence = sentences[i]; IList <CoreLabel> sentenceWords = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <Mention> sentenceMentions = new List <Mention>(); mentions.Add(sentenceMentions); foreach (Mention g in input.goldMentions[i]) { sentenceMentions.Add(new Mention(-1, g.startIndex, g.endIndex, sentenceWords, null, null, new List <CoreLabel>(sentenceWords.SubList(g.startIndex, g.endIndex)))); } md.FindHead(sentence, sentenceMentions); } } else { foreach (ICoreMap sentence in input.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { mentions.Add(sentence.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation))); } } Document doc = new Document(input, mentions); if (input.goldMentions != null) { FindGoldMentionHeads(doc); } DocumentPreprocessor.Preprocess(doc, dict, null, headFinder); return(doc); }
public StatisticalCorefAlgorithm(Properties props, Dictionaries dictionaries) : this(props, dictionaries, StatisticalCorefProperties.WordCountsPath(props), StatisticalCorefProperties.RankingModelPath(props), CorefProperties.MaxMentionDistance(props), CorefProperties.MaxMentionDistanceWithStringMatch(props), StatisticalCorefProperties .PairwiseScoreThresholds(props)) { }
public static string PretrainedEmbeddingsPath(Properties props) { string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + "-embeddings.ser.gz"; return(PropertiesUtils.GetString(props, "coref.neural.embeddingsPath", defaultPath)); }
public virtual void Annotate(Annotation annotation) { IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); // TO DO: be careful, this could introduce a really hard to find bug // this is necessary for Chinese coreference // removeNested needs to be set to "false" for newswire text or big performance drop string docID = annotation.Get(typeof(CoreAnnotations.DocIDAnnotation)); if (docID == null) { docID = string.Empty; } if (docID.Contains("nw") && (CorefProperties.Conll(corefProperties) || corefProperties.GetProperty("coref.input.type", "raw").Equals("conll")) && CorefProperties.GetLanguage(corefProperties) == Locale.Chinese && PropertiesUtils.GetBool(corefProperties , "coref.specialCaseNewswire")) { corefProperties.SetProperty("removeNestedMentions", "false"); } else { corefProperties.SetProperty("removeNestedMentions", "true"); } IList <IList <Mention> > mentions = md.FindMentions(annotation, dictionaries, corefProperties); // build list of coref mentions in this document annotation.Set(typeof(CorefCoreAnnotations.CorefMentionsAnnotation), new List <Mention>()); // initialize indexes int mentionIndex = 0; int currIndex = 0; // initialize each token with an empty set of corresponding coref mention id's foreach (CoreLabel token in annotation.Get(typeof(CoreAnnotations.TokensAnnotation))) { token.Set(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation), new ArraySet <int>()); } foreach (ICoreMap sentence in sentences) { IList <Mention> mentionsForThisSentence = mentions[currIndex]; sentence.Set(typeof(CorefCoreAnnotations.CorefMentionsAnnotation), mentionsForThisSentence); Sharpen.Collections.AddAll(annotation.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation)), mentionsForThisSentence); // set sentNum correctly for each coref mention foreach (Mention corefMention in mentionsForThisSentence) { corefMention.sentNum = currIndex; } // increment to next list of mentions currIndex++; // assign latest mentionID, annotate tokens with coref mention info foreach (Mention m in mentionsForThisSentence) { m.mentionID = mentionIndex; // go through all the tokens corresponding to this coref mention // annotate them with the index into the document wide coref mention list for (int corefMentionTokenIndex = m.startIndex; corefMentionTokenIndex < m.endIndex; corefMentionTokenIndex++) { CoreLabel currToken = sentence.Get(typeof(CoreAnnotations.TokensAnnotation))[corefMentionTokenIndex]; currToken.Get(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation)).Add(mentionIndex); } mentionIndex++; } } // synch coref mentions to entity mentions Dictionary <int, int> corefMentionToEntityMentionMapping = new Dictionary <int, int>(); Dictionary <int, int> entityMentionToCorefMentionMapping = new Dictionary <int, int>(); foreach (CoreLabel token_1 in annotation.Get(typeof(CoreAnnotations.TokensAnnotation))) { if (token_1.Get(typeof(CoreAnnotations.EntityMentionIndexAnnotation)) != null) { int tokenEntityMentionIndex = token_1.Get(typeof(CoreAnnotations.EntityMentionIndexAnnotation)); ICoreMap tokenEntityMention = annotation.Get(typeof(CoreAnnotations.MentionsAnnotation))[tokenEntityMentionIndex]; foreach (int candidateCorefMentionIndex in token_1.Get(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation))) { Mention candidateTokenCorefMention = annotation.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation))[candidateCorefMentionIndex]; if (SynchCorefMentionEntityMention(annotation, candidateTokenCorefMention, tokenEntityMention)) { entityMentionToCorefMentionMapping[tokenEntityMentionIndex] = candidateCorefMentionIndex; corefMentionToEntityMentionMapping[candidateCorefMentionIndex] = tokenEntityMentionIndex; } } } } // store mappings between entity mentions and coref mentions in annotation annotation.Set(typeof(CoreAnnotations.CorefMentionToEntityMentionMappingAnnotation), corefMentionToEntityMentionMapping); annotation.Set(typeof(CoreAnnotations.EntityMentionToCorefMentionMappingAnnotation), entityMentionToCorefMentionMapping); }
/// <exception cref="System.Exception"/> public static void RunCoref(Properties props) { /* * property, environment setting */ Redwood.HideChannelsEverywhere("debug-cluster", "debug-mention", "debug-preprocessor", "debug-docreader", "debug-mergethres", "debug-featureselection", "debug-md"); int nThreads = HybridCorefProperties.GetThreadCounts(props); string timeStamp = Calendar.GetInstance().GetTime().ToString().ReplaceAll("\\s", "-").ReplaceAll(":", "-"); Logger logger = Logger.GetLogger(typeof(Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem).FullName); // set log file path if (props.Contains(HybridCorefProperties.LogProp)) { File logFile = new File(props.GetProperty(HybridCorefProperties.LogProp)); RedwoodConfiguration.Current().Handlers(RedwoodConfiguration.Handlers.File(logFile)).Apply(); Redwood.Log("Starting coref log"); } log.Info(props.ToString()); if (HybridCorefProperties.CheckMemory(props)) { CheckMemoryUsage(); } Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem cs = new Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem(props); /* * output setting */ // prepare conll output string goldOutput = null; string beforeCorefOutput = null; string afterCorefOutput = null; PrintWriter writerGold = null; PrintWriter writerBeforeCoref = null; PrintWriter writerAfterCoref = null; if (HybridCorefProperties.DoScore(props)) { string pathOutput = CorefProperties.ConllOutputPath(props); (new File(pathOutput)).Mkdir(); goldOutput = pathOutput + "output-" + timeStamp + ".gold.txt"; beforeCorefOutput = pathOutput + "output-" + timeStamp + ".predicted.txt"; afterCorefOutput = pathOutput + "output-" + timeStamp + ".coref.predicted.txt"; writerGold = new PrintWriter(new FileOutputStream(goldOutput)); writerBeforeCoref = new PrintWriter(new FileOutputStream(beforeCorefOutput)); writerAfterCoref = new PrintWriter(new FileOutputStream(afterCorefOutput)); } // run coref MulticoreWrapper <Pair <Document, Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem>, StringBuilder[]> wrapper = new MulticoreWrapper <Pair <Document, Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem>, StringBuilder[]>(nThreads, new _IThreadsafeProcessor_134 ()); // conll output and logs DateTime startTime = null; if (HybridCorefProperties.CheckTime(props)) { startTime = new DateTime(); System.Console.Error.Printf("END-TO-END COREF Start time: %s\n", startTime); } // run processes int docCnt = 0; while (true) { Document document = cs.docMaker.NextDoc(); if (document == null) { break; } wrapper.Put(Pair.MakePair(document, cs)); docCnt = LogOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt); } // Finished reading the input. Wait for jobs to finish wrapper.Join(); docCnt = LogOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt); IOUtils.CloseIgnoringExceptions(writerGold); IOUtils.CloseIgnoringExceptions(writerBeforeCoref); IOUtils.CloseIgnoringExceptions(writerAfterCoref); if (HybridCorefProperties.CheckTime(props)) { System.Console.Error.Printf("END-TO-END COREF Elapsed time: %.3f seconds\n", (((new DateTime()).GetTime() - startTime.GetTime()) / 1000F)); } // System.err.printf("CORENLP PROCESS TIME TOTAL: %.3f seconds\n", cs.mentionExtractor.corenlpProcessTime); if (HybridCorefProperties.CheckMemory(props)) { CheckMemoryUsage(); } // scoring if (HybridCorefProperties.DoScore(props)) { string summary = CorefScorer.GetEvalSummary(CorefProperties.GetScorerPath(props), goldOutput, beforeCorefOutput); CorefScorer.PrintScoreSummary(summary, logger, false); summary = CorefScorer.GetEvalSummary(CorefProperties.GetScorerPath(props), goldOutput, afterCorefOutput); CorefScorer.PrintScoreSummary(summary, logger, true); CorefScorer.PrintFinalConllScore(summary); } }
public static string ModelPath(Properties props) { string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + (CorefProperties.Conll(props) ? "-model-conll" : "-model-default") + ".ser.gz"; return(PropertiesUtils.GetString(props, "coref.neural.modelPath", defaultPath)); }
public RuleBasedCorefMentionFinder(IHeadFinder headFinder, Properties props) : this(true, headFinder, CorefProperties.GetLanguage(props)) { }
/// <exception cref="System.TypeLoadException"/> /// <exception cref="System.IO.IOException"/> public HybridCorefMentionFinder(IHeadFinder headFinder, Properties props) { this.headFinder = headFinder; this.lang = CorefProperties.GetLanguage(props); mdClassifier = (CorefProperties.IsMentionDetectionTraining(props)) ? null : IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(CorefProperties.GetMentionDetectionModel(props)); }
private static string GetDefaultModelPath(Properties props, string modelName) { return("edu/stanford/nlp/models/coref/statistical/" + modelName + (CorefProperties.Conll(props) ? "_conll" : string.Empty) + ".ser.gz"); }