public PairwiseModel(PairwiseModel.Builder builder) { name = builder.name; meta = builder.meta; trainingExamples = builder.trainingExamples; epochs = builder.epochs; singletonRatio = builder.singletonRatio; classifier = new SimpleLinearClassifier(builder.loss, builder.learningRateSchedule, builder.regularizationStrength, builder.modelFile == null ? null : ((builder.modelFile.EndsWith(".ser") || builder.modelFile.EndsWith(".gz")) ? builder.modelFile : StatisticalCorefTrainer.pairwiseModelsPath + builder.modelFile + "/model.ser")); str = StatisticalCorefTrainer.FieldValues(builder); }
public MetaFeatureExtractor(MetaFeatureExtractor.Builder builder) { anaphoricityClassifier = builder.anaphoricityClassifier; if (anaphoricityClassifier) { pairConjunctions = new HashSet <MetaFeatureExtractor.PairConjunction>(); } else { pairConjunctions = new HashSet <MetaFeatureExtractor.PairConjunction>(builder.pairConjunctions); } singleConjunctions = new HashSet <MetaFeatureExtractor.SingleConjunction>(builder.singleConjunctions); disallowedPrefixes = builder.disallowedPrefixes; neTypeConjuntion = builder.useNEType; str = StatisticalCorefTrainer.FieldValues(builder); }
public virtual void DoTraining(string modelName) { classifier.SetWeight("bias", -0.3); classifier.SetWeight("anaphorSeen", -1); classifier.SetWeight("max-ranking", 1); classifier.SetWeight("bias-single", -0.3); classifier.SetWeight("anaphorSeen-single", -1); classifier.SetWeight("max-ranking-single", 1); string outputPath = StatisticalCorefTrainer.clusteringModelsPath + modelName + "/"; File outDir = new File(outputPath); if (!outDir.Exists()) { outDir.Mkdir(); } PrintWriter progressWriter; IList <ClustererDataLoader.ClustererDoc> trainDocs; try { PrintWriter configWriter = new PrintWriter(outputPath + "config", "UTF-8"); configWriter.Print(StatisticalCorefTrainer.FieldValues(this)); configWriter.Close(); progressWriter = new PrintWriter(outputPath + "progress", "UTF-8"); Redwood.Log("scoref.train", "Loading training data"); StatisticalCorefTrainer.SetDataPath("dev"); trainDocs = ClustererDataLoader.LoadDocuments(MaxDocs); } catch (Exception e) { throw new Exception("Error setting up training", e); } double bestTrainScore = 0; IList <IList <Pair <Clusterer.CandidateAction, Clusterer.CandidateAction> > > examples = new List <IList <Pair <Clusterer.CandidateAction, Clusterer.CandidateAction> > >(); for (int iteration = 0; iteration < RetrainIterations; iteration++) { Redwood.Log("scoref.train", "ITERATION " + iteration); classifier.PrintWeightVector(null); Redwood.Log("scoref.train", string.Empty); try { classifier.WriteWeights(outputPath + "model"); classifier.PrintWeightVector(IOUtils.GetPrintWriter(outputPath + "weights")); } catch (Exception) { throw new Exception(); } long start = Runtime.CurrentTimeMillis(); Java.Util.Collections.Shuffle(trainDocs, random); examples = examples.SubList(Math.Max(0, examples.Count - BufferSizeMultiplier * trainDocs.Count), examples.Count); TrainPolicy(examples); if (iteration % EvalFrequency == 0) { double trainScore = EvaluatePolicy(trainDocs, true); if (trainScore > bestTrainScore) { bestTrainScore = trainScore; WriteModel("best", outputPath); } if (iteration % 10 == 0) { WriteModel("iter_" + iteration, outputPath); } WriteModel("last", outputPath); double timeElapsed = (Runtime.CurrentTimeMillis() - start) / 1000.0; double ffhr = Clusterer.State.ffHits / (double)(Clusterer.State.ffHits + Clusterer.State.ffMisses); double shr = Clusterer.State.sHits / (double)(Clusterer.State.sHits + Clusterer.State.sMisses); double fhr = featuresCacheHits / (double)(featuresCacheHits + featuresCacheMisses); Redwood.Log("scoref.train", modelName); Redwood.Log("scoref.train", string.Format("Best train: %.4f", bestTrainScore)); Redwood.Log("scoref.train", string.Format("Time elapsed: %.2f", timeElapsed)); Redwood.Log("scoref.train", string.Format("Cost hit rate: %.4f", ffhr)); Redwood.Log("scoref.train", string.Format("Score hit rate: %.4f", shr)); Redwood.Log("scoref.train", string.Format("Features hit rate: %.4f", fhr)); Redwood.Log("scoref.train", string.Empty); progressWriter.Write(iteration + " " + trainScore + " " + " " + timeElapsed + " " + ffhr + " " + shr + " " + fhr + "\n"); progressWriter.Flush(); } foreach (ClustererDataLoader.ClustererDoc trainDoc in trainDocs) { examples.Add(RunPolicy(trainDoc, Math.Pow(ExpertDecay, (iteration + 1)))); } } progressWriter.Close(); }