/// <exception cref="System.Exception"/> public static void TrainRanking(PairwiseModel model) { Redwood.Log("scoref-train", "Reading compression..."); Compressor <string> compressor = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.compressorFile); Redwood.Log("scoref-train", "Reading train data..."); IList <DocumentExamples> trainDocuments = IOUtils.ReadObjectFromFile(StatisticalCorefTrainer.extractedFeaturesFile); Redwood.Log("scoref-train", "Training..."); for (int i = 0; i < model.GetNumEpochs(); i++) { Java.Util.Collections.Shuffle(trainDocuments); int j = 0; foreach (DocumentExamples doc in trainDocuments) { j++; Redwood.Log("scoref-train", "On epoch: " + i + " / " + model.GetNumEpochs() + ", document: " + j + " / " + trainDocuments.Count); IDictionary <int, IList <Example> > mentionToPotentialAntecedents = new Dictionary <int, IList <Example> >(); foreach (Example e in doc.examples) { int mention = e.mentionId2; IList <Example> potentialAntecedents = mentionToPotentialAntecedents[mention]; if (potentialAntecedents == null) { potentialAntecedents = new List <Example>(); mentionToPotentialAntecedents[mention] = potentialAntecedents; } potentialAntecedents.Add(e); } IList <IList <Example> > examples = new List <IList <Example> >(mentionToPotentialAntecedents.Values); Java.Util.Collections.Shuffle(examples); foreach (IList <Example> es in examples) { if (es.Count == 0) { continue; } if (model is MaxMarginMentionRanker) { MaxMarginMentionRanker ranker = (MaxMarginMentionRanker)model; bool noAntecedent = es.Stream().AllMatch(null); es.Add(new Example(es[0], noAntecedent)); double maxPositiveScore = -double.MaxValue; Example maxScoringPositive = null; foreach (Example e_1 in es) { double score = model.Predict(e_1, doc.mentionFeatures, compressor); if (e_1.label == 1) { System.Diagnostics.Debug.Assert((!noAntecedent ^ e_1.IsNewLink())); if (score > maxPositiveScore) { maxPositiveScore = score; maxScoringPositive = e_1; } } } System.Diagnostics.Debug.Assert((maxScoringPositive != null)); double maxNegativeScore = -double.MaxValue; Example maxScoringNegative = null; MaxMarginMentionRanker.ErrorType maxScoringEt = null; foreach (Example e_2 in es) { double score = model.Predict(e_2, doc.mentionFeatures, compressor); if (e_2.label != 1) { System.Diagnostics.Debug.Assert((!(noAntecedent && e_2.IsNewLink()))); MaxMarginMentionRanker.ErrorType et = MaxMarginMentionRanker.ErrorType.Wl; if (noAntecedent && !e_2.IsNewLink()) { et = MaxMarginMentionRanker.ErrorType.Fl; } else { if (!noAntecedent && e_2.IsNewLink()) { if (e_2.mentionType2 == Dictionaries.MentionType.Pronominal) { et = MaxMarginMentionRanker.ErrorType.FnPron; } else { et = MaxMarginMentionRanker.ErrorType.Fn; } } } if (ranker.multiplicativeCost) { score = ranker.costs[et.id] * (1 - maxPositiveScore + score); } else { score += ranker.costs[et.id]; } if (score > maxNegativeScore) { maxNegativeScore = score; maxScoringNegative = e_2; maxScoringEt = et; } } } System.Diagnostics.Debug.Assert((maxScoringNegative != null)); ranker.Learn(maxScoringPositive, maxScoringNegative, doc.mentionFeatures, compressor, maxScoringEt); } else { double maxPositiveScore = -double.MaxValue; double maxNegativeScore = -double.MaxValue; Example maxScoringPositive = null; Example maxScoringNegative = null; foreach (Example e_1 in es) { double score = model.Predict(e_1, doc.mentionFeatures, compressor); if (e_1.label == 1) { if (score > maxPositiveScore) { maxPositiveScore = score; maxScoringPositive = e_1; } } else { if (score > maxNegativeScore) { maxNegativeScore = score; maxScoringNegative = e_1; } } } model.Learn(maxScoringPositive, maxScoringNegative, doc.mentionFeatures, compressor, 1); } } } } Redwood.Log("scoref-train", "Writing models..."); model.WriteModel(); }
public virtual void Learn(Example correct, Example incorrect, IDictionary <int, CompressedFeatureVector> mentionFeatures, Compressor <string> compressor, MaxMarginMentionRanker.ErrorType errorType) { ICounter <string> cFeatures = meta.GetFeatures(correct, mentionFeatures, compressor); ICounter <string> iFeatures = meta.GetFeatures(incorrect, mentionFeatures, compressor); foreach (KeyValuePair <string, double> e in cFeatures.EntrySet()) { iFeatures.DecrementCount(e.Key, e.Value); } if (multiplicativeCost) { classifier.Learn(iFeatures, 1.0, costs[errorType.id], loss); } else { classifier.Learn(iFeatures, 1.0, 1.0, losses[errorType.id]); } }