public PairwiseModel(PairwiseModel.Builder builder)
 {
     name             = builder.name;
     meta             = builder.meta;
     trainingExamples = builder.trainingExamples;
     epochs           = builder.epochs;
     singletonRatio   = builder.singletonRatio;
     classifier       = new SimpleLinearClassifier(builder.loss, builder.learningRateSchedule, builder.regularizationStrength, builder.modelFile == null ? null : ((builder.modelFile.EndsWith(".ser") || builder.modelFile.EndsWith(".gz")) ? builder.modelFile
                          : StatisticalCorefTrainer.pairwiseModelsPath + builder.modelFile + "/model.ser"));
     str = StatisticalCorefTrainer.FieldValues(builder);
 }
 public MetaFeatureExtractor(MetaFeatureExtractor.Builder builder)
 {
     anaphoricityClassifier = builder.anaphoricityClassifier;
     if (anaphoricityClassifier)
     {
         pairConjunctions = new HashSet <MetaFeatureExtractor.PairConjunction>();
     }
     else
     {
         pairConjunctions = new HashSet <MetaFeatureExtractor.PairConjunction>(builder.pairConjunctions);
     }
     singleConjunctions = new HashSet <MetaFeatureExtractor.SingleConjunction>(builder.singleConjunctions);
     disallowedPrefixes = builder.disallowedPrefixes;
     neTypeConjuntion   = builder.useNEType;
     str = StatisticalCorefTrainer.FieldValues(builder);
 }
        public virtual void DoTraining(string modelName)
        {
            classifier.SetWeight("bias", -0.3);
            classifier.SetWeight("anaphorSeen", -1);
            classifier.SetWeight("max-ranking", 1);
            classifier.SetWeight("bias-single", -0.3);
            classifier.SetWeight("anaphorSeen-single", -1);
            classifier.SetWeight("max-ranking-single", 1);
            string outputPath = StatisticalCorefTrainer.clusteringModelsPath + modelName + "/";
            File   outDir     = new File(outputPath);

            if (!outDir.Exists())
            {
                outDir.Mkdir();
            }
            PrintWriter progressWriter;
            IList <ClustererDataLoader.ClustererDoc> trainDocs;

            try
            {
                PrintWriter configWriter = new PrintWriter(outputPath + "config", "UTF-8");
                configWriter.Print(StatisticalCorefTrainer.FieldValues(this));
                configWriter.Close();
                progressWriter = new PrintWriter(outputPath + "progress", "UTF-8");
                Redwood.Log("scoref.train", "Loading training data");
                StatisticalCorefTrainer.SetDataPath("dev");
                trainDocs = ClustererDataLoader.LoadDocuments(MaxDocs);
            }
            catch (Exception e)
            {
                throw new Exception("Error setting up training", e);
            }
            double bestTrainScore = 0;
            IList <IList <Pair <Clusterer.CandidateAction, Clusterer.CandidateAction> > > examples = new List <IList <Pair <Clusterer.CandidateAction, Clusterer.CandidateAction> > >();

            for (int iteration = 0; iteration < RetrainIterations; iteration++)
            {
                Redwood.Log("scoref.train", "ITERATION " + iteration);
                classifier.PrintWeightVector(null);
                Redwood.Log("scoref.train", string.Empty);
                try
                {
                    classifier.WriteWeights(outputPath + "model");
                    classifier.PrintWeightVector(IOUtils.GetPrintWriter(outputPath + "weights"));
                }
                catch (Exception)
                {
                    throw new Exception();
                }
                long start = Runtime.CurrentTimeMillis();
                Java.Util.Collections.Shuffle(trainDocs, random);
                examples = examples.SubList(Math.Max(0, examples.Count - BufferSizeMultiplier * trainDocs.Count), examples.Count);
                TrainPolicy(examples);
                if (iteration % EvalFrequency == 0)
                {
                    double trainScore = EvaluatePolicy(trainDocs, true);
                    if (trainScore > bestTrainScore)
                    {
                        bestTrainScore = trainScore;
                        WriteModel("best", outputPath);
                    }
                    if (iteration % 10 == 0)
                    {
                        WriteModel("iter_" + iteration, outputPath);
                    }
                    WriteModel("last", outputPath);
                    double timeElapsed = (Runtime.CurrentTimeMillis() - start) / 1000.0;
                    double ffhr        = Clusterer.State.ffHits / (double)(Clusterer.State.ffHits + Clusterer.State.ffMisses);
                    double shr         = Clusterer.State.sHits / (double)(Clusterer.State.sHits + Clusterer.State.sMisses);
                    double fhr         = featuresCacheHits / (double)(featuresCacheHits + featuresCacheMisses);
                    Redwood.Log("scoref.train", modelName);
                    Redwood.Log("scoref.train", string.Format("Best train: %.4f", bestTrainScore));
                    Redwood.Log("scoref.train", string.Format("Time elapsed: %.2f", timeElapsed));
                    Redwood.Log("scoref.train", string.Format("Cost hit rate: %.4f", ffhr));
                    Redwood.Log("scoref.train", string.Format("Score hit rate: %.4f", shr));
                    Redwood.Log("scoref.train", string.Format("Features hit rate: %.4f", fhr));
                    Redwood.Log("scoref.train", string.Empty);
                    progressWriter.Write(iteration + " " + trainScore + " " + " " + timeElapsed + " " + ffhr + " " + shr + " " + fhr + "\n");
                    progressWriter.Flush();
                }
                foreach (ClustererDataLoader.ClustererDoc trainDoc in trainDocs)
                {
                    examples.Add(RunPolicy(trainDoc, Math.Pow(ExpertDecay, (iteration + 1))));
                }
            }
            progressWriter.Close();
        }