コード例 #1
0
        /// <summary>Generate the training features from the CoNLL input file.</summary>
        /// <returns>Dataset of feature vectors</returns>
        /// <exception cref="System.Exception"/>
        private static GeneralDataset <string, string> GenerateFeatureVectors(Properties props)
        {
            GeneralDataset <string, string> dataset = new Dataset <string, string>();
            Dictionaries  dict     = new Dictionaries(props);
            DocumentMaker docMaker = new DocumentMaker(props, dict);
            Document      document;

            while ((document = docMaker.NextDoc()) != null)
            {
                SetTokenIndices(document);
                IDictionary <int, CorefCluster> entities = document.goldCorefClusters;
                // Generate features for coreferent mentions with class label 1
                foreach (CorefCluster entity in entities.Values)
                {
                    foreach (Mention mention in entity.GetCorefMentions())
                    {
                        // Ignore verbal mentions
                        if (mention.headWord.Tag().StartsWith("V"))
                        {
                            continue;
                        }
                        IndexedWord head = mention.enhancedDependency.GetNodeByIndexSafe(mention.headWord.Index());
                        if (head == null)
                        {
                            continue;
                        }
                        List <string> feats = mention.GetSingletonFeatures(dict);
                        dataset.Add(new BasicDatum <string, string>(feats, "1"));
                    }
                }
                // Generate features for singletons with class label 0
                List <CoreLabel> gold_heads = new List <CoreLabel>();
                foreach (Mention gold_men in document.goldMentionsByID.Values)
                {
                    gold_heads.Add(gold_men.headWord);
                }
                foreach (Mention predicted_men in document.predictedMentionsByID.Values)
                {
                    SemanticGraph dep  = predicted_men.enhancedDependency;
                    IndexedWord   head = dep.GetNodeByIndexSafe(predicted_men.headWord.Index());
                    if (head == null || !dep.VertexSet().Contains(head))
                    {
                        continue;
                    }
                    // Ignore verbal mentions
                    if (predicted_men.headWord.Tag().StartsWith("V"))
                    {
                        continue;
                    }
                    // If the mention is in the gold set, it is not a singleton and thus ignore
                    if (gold_heads.Contains(predicted_men.headWord))
                    {
                        continue;
                    }
                    dataset.Add(new BasicDatum <string, string>(predicted_men.GetSingletonFeatures(dict), "0"));
                }
            }
            dataset.SummaryStatistics();
            return(dataset);
        }
コード例 #2
0
 public CorefSystem(DocumentMaker docMaker, ICorefAlgorithm corefAlgorithm, bool removeSingletonClusters, bool verbose)
 {
     this.docMaker                = docMaker;
     this.corefAlgorithm          = corefAlgorithm;
     this.removeSingletonClusters = removeSingletonClusters;
     this.verbose = verbose;
 }
コード例 #3
0
 public CorefSystem(Properties props)
 {
     try
     {
         Dictionaries dictionaries = new Dictionaries(props);
         docMaker                = new DocumentMaker(props, dictionaries);
         corefAlgorithm          = ICorefAlgorithm.FromProps(props, dictionaries);
         removeSingletonClusters = CorefProperties.RemoveSingletonClusters(props);
         verbose = CorefProperties.Verbose(props);
     }
     catch (Exception e)
     {
         throw new Exception("Error initializing coref system", e);
     }
 }
コード例 #4
0
 /// <exception cref="System.Exception"/>
 public HybridCorefSystem(Properties props)
 {
     this.props = props;
     sieves     = Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.LoadSieves(props);
     // set semantics loading
     foreach (Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve sieve in sieves)
     {
         if (sieve.classifierType == Sieve.ClassifierType.Rule)
         {
             continue;
         }
         if (HybridCorefProperties.UseWordEmbedding(props, sieve.sievename))
         {
             props.SetProperty(HybridCorefProperties.LoadWordEmbeddingProp, "true");
         }
     }
     dictionaries = new Edu.Stanford.Nlp.Coref.Data.Dictionaries(props);
     docMaker     = new DocumentMaker(props, dictionaries);
 }