public ClusteringCorefAlgorithm(Properties props, Dictionaries dictionaries, string clusteringPath, string classificationPath, string rankingPath, string anaphoricityPath, string wordCountsPath)
 {
     clusterer           = new Clusterer(clusteringPath);
     classificationModel = PairwiseModel.NewBuilder("classification", MetaFeatureExtractor.NewBuilder().Build()).ModelPath(classificationPath).Build();
     rankingModel        = PairwiseModel.NewBuilder("ranking", MetaFeatureExtractor.NewBuilder().Build()).ModelPath(rankingPath).Build();
     anaphoricityModel   = PairwiseModel.NewBuilder("anaphoricity", MetaFeatureExtractor.AnaphoricityMFE()).ModelPath(anaphoricityPath).Build();
     extractor           = new FeatureExtractor(props, dictionaries, null, wordCountsPath);
 }
 public StatisticalCorefAlgorithm(Properties props, Dictionaries dictionaries, string wordCountsFile, string modelPath, int maxMentionDistance, int maxMentionDistanceWithStringMatch, double[] thresholds)
 {
     extractor  = new FeatureExtractor(props, dictionaries, null, wordCountsFile);
     classifier = PairwiseModel.NewBuilder("classifier", MetaFeatureExtractor.NewBuilder().Build()).ModelPath(modelPath).Build();
     this.maxMentionDistance = maxMentionDistance;
     this.maxMentionDistanceWithStringMatch = maxMentionDistanceWithStringMatch;
     this.thresholds = MakeThresholds(thresholds);
 }
Exemple #3
0
        /// <exception cref="System.Exception"/>
        public static void DoTraining(Properties props)
        {
            SetTrainingPath(props);
            Dictionaries dictionaries = new Dictionaries(props);

            SetDataPath("train");
            wordCountsFile = trainingPath + "train/word_counts.ser";
            CorefProperties.SetInput(props, CorefProperties.Dataset.Train);
            Preprocess(props, dictionaries, true);
            SetDataPath("dev");
            CorefProperties.SetInput(props, CorefProperties.Dataset.Dev);
            Preprocess(props, dictionaries, false);
            SetDataPath("train");
            dictionaries = null;
            PairwiseModel classificationModel = PairwiseModel.NewBuilder(ClassificationModel, MetaFeatureExtractor.NewBuilder().Build()).Build();
            PairwiseModel rankingModel        = PairwiseModel.NewBuilder(RankingModel, MetaFeatureExtractor.NewBuilder().Build()).Build();
            PairwiseModel anaphoricityModel   = PairwiseModel.NewBuilder(AnaphoricityModel, MetaFeatureExtractor.AnaphoricityMFE()).TrainingExamples(5000000).Build();

            PairwiseModelTrainer.TrainRanking(rankingModel);
            PairwiseModelTrainer.TrainClassification(classificationModel, false);
            PairwiseModelTrainer.TrainClassification(anaphoricityModel, true);
            SetDataPath("dev");
            PairwiseModelTrainer.Test(classificationModel, predictionsName, false);
            PairwiseModelTrainer.Test(rankingModel, predictionsName, false);
            PairwiseModelTrainer.Test(anaphoricityModel, predictionsName, true);
            new Clusterer().DoTraining(ClusteringModelName);
        }
 public static MetaFeatureExtractor AnaphoricityMFE()
 {
     return(MetaFeatureExtractor.NewBuilder().SingleConjunctions(new MetaFeatureExtractor.SingleConjunction[] { MetaFeatureExtractor.SingleConjunction.Index, MetaFeatureExtractor.SingleConjunction.IndexLast }).DisallowedPrefixes(new string[] { "parent-word" }).AnaphoricityClassifier(true).Build());
 }