/// <summary>Generate the training features from the CoNLL input file.</summary> /// <returns>Dataset of feature vectors</returns> /// <exception cref="System.Exception"/> private static GeneralDataset <string, string> GenerateFeatureVectors(Properties props) { GeneralDataset <string, string> dataset = new Dataset <string, string>(); Dictionaries dict = new Dictionaries(props); DocumentMaker docMaker = new DocumentMaker(props, dict); Document document; while ((document = docMaker.NextDoc()) != null) { SetTokenIndices(document); IDictionary <int, CorefCluster> entities = document.goldCorefClusters; // Generate features for coreferent mentions with class label 1 foreach (CorefCluster entity in entities.Values) { foreach (Mention mention in entity.GetCorefMentions()) { // Ignore verbal mentions if (mention.headWord.Tag().StartsWith("V")) { continue; } IndexedWord head = mention.enhancedDependency.GetNodeByIndexSafe(mention.headWord.Index()); if (head == null) { continue; } List <string> feats = mention.GetSingletonFeatures(dict); dataset.Add(new BasicDatum <string, string>(feats, "1")); } } // Generate features for singletons with class label 0 List <CoreLabel> gold_heads = new List <CoreLabel>(); foreach (Mention gold_men in document.goldMentionsByID.Values) { gold_heads.Add(gold_men.headWord); } foreach (Mention predicted_men in document.predictedMentionsByID.Values) { SemanticGraph dep = predicted_men.enhancedDependency; IndexedWord head = dep.GetNodeByIndexSafe(predicted_men.headWord.Index()); if (head == null || !dep.VertexSet().Contains(head)) { continue; } // Ignore verbal mentions if (predicted_men.headWord.Tag().StartsWith("V")) { continue; } // If the mention is in the gold set, it is not a singleton and thus ignore if (gold_heads.Contains(predicted_men.headWord)) { continue; } dataset.Add(new BasicDatum <string, string>(predicted_men.GetSingletonFeatures(dict), "0")); } } dataset.SummaryStatistics(); return(dataset); }
public CorefSystem(DocumentMaker docMaker, ICorefAlgorithm corefAlgorithm, bool removeSingletonClusters, bool verbose) { this.docMaker = docMaker; this.corefAlgorithm = corefAlgorithm; this.removeSingletonClusters = removeSingletonClusters; this.verbose = verbose; }
public CorefSystem(Properties props) { try { Dictionaries dictionaries = new Dictionaries(props); docMaker = new DocumentMaker(props, dictionaries); corefAlgorithm = ICorefAlgorithm.FromProps(props, dictionaries); removeSingletonClusters = CorefProperties.RemoveSingletonClusters(props); verbose = CorefProperties.Verbose(props); } catch (Exception e) { throw new Exception("Error initializing coref system", e); } }
/// <exception cref="System.Exception"/> public HybridCorefSystem(Properties props) { this.props = props; sieves = Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve.LoadSieves(props); // set semantics loading foreach (Edu.Stanford.Nlp.Coref.Hybrid.Sieve.Sieve sieve in sieves) { if (sieve.classifierType == Sieve.ClassifierType.Rule) { continue; } if (HybridCorefProperties.UseWordEmbedding(props, sieve.sievename)) { props.SetProperty(HybridCorefProperties.LoadWordEmbeddingProp, "true"); } } dictionaries = new Edu.Stanford.Nlp.Coref.Data.Dictionaries(props); docMaker = new DocumentMaker(props, dictionaries); }