コード例 #1
0
 public NeuralCorefAlgorithm(Properties props, Dictionaries dictionaries)
 {
     greedyness         = NeuralCorefProperties.Greedyness(props);
     maxMentionDistance = CorefProperties.MaxMentionDistance(props);
     maxMentionDistanceWithStringMatch = CorefProperties.MaxMentionDistanceWithStringMatch(props);
     model = IOUtils.ReadObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(log, "Loading coref model", NeuralCorefProperties.ModelPath(props));
     embeddingExtractor = new EmbeddingExtractor(CorefProperties.Conll(props), IOUtils.ReadObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(log, "Loading coref embeddings", NeuralCorefProperties.PretrainedEmbeddingsPath(props)), model.GetWordEmbeddings
                                                     ());
     featureExtractor = new CategoricalFeatureExtractor(props, dictionaries);
 }
コード例 #2
0
        public virtual void Process(int id, Document document)
        {
            IJsonArrayBuilder clusters = Javax.Json.Json.CreateArrayBuilder();

            foreach (CorefCluster gold in document.goldCorefClusters.Values)
            {
                IJsonArrayBuilder c = Javax.Json.Json.CreateArrayBuilder();
                foreach (Mention m in gold.corefMentions)
                {
                    c.Add(m.mentionID);
                }
                clusters.Add(c.Build());
            }
            goldClusterWriter.Println(Javax.Json.Json.CreateObjectBuilder().Add(id.ToString(), clusters.Build()).Build());
            IDictionary <Pair <int, int>, bool> mentionPairs = CorefUtils.GetLabeledMentionPairs(document);
            IList <Mention> mentionsList = CorefUtils.GetSortedMentions(document);
            IDictionary <int, IList <Mention> > mentionsByHeadIndex = new Dictionary <int, IList <Mention> >();

            foreach (Mention m_1 in mentionsList)
            {
                IList <Mention> withIndex = mentionsByHeadIndex.ComputeIfAbsent(m_1.headIndex, null);
                withIndex.Add(m_1);
            }
            IJsonObjectBuilder docFeatures = Javax.Json.Json.CreateObjectBuilder();

            docFeatures.Add("doc_id", id);
            docFeatures.Add("type", document.docType == Document.DocType.Article ? 1 : 0);
            docFeatures.Add("source", document.docInfo["DOC_ID"].Split("/")[0]);
            IJsonArrayBuilder sentences = Javax.Json.Json.CreateArrayBuilder();

            foreach (ICoreMap sentence in document.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                sentences.Add(GetSentenceArray(sentence.Get(typeof(CoreAnnotations.TokensAnnotation))));
            }
            IJsonObjectBuilder mentions = Javax.Json.Json.CreateObjectBuilder();

            foreach (Mention m_2 in document.predictedMentionsByID.Values)
            {
                IEnumerator <SemanticGraphEdge> iterator = m_2.enhancedDependency.IncomingEdgeIterator(m_2.headIndexedWord);
                SemanticGraphEdge relation    = iterator.MoveNext() ? iterator.Current : null;
                string            depRelation = relation == null ? "no-parent" : relation.GetRelation().ToString();
                string            depParent   = relation == null ? "<missing>" : relation.GetSource().Word();
                mentions.Add(m_2.mentionNum.ToString(), Javax.Json.Json.CreateObjectBuilder().Add("doc_id", id).Add("mention_id", m_2.mentionID).Add("mention_num", m_2.mentionNum).Add("sent_num", m_2.sentNum).Add("start_index", m_2.startIndex).Add("end_index"
                                                                                                                                                                                                                                                        , m_2.endIndex).Add("head_index", m_2.headIndex).Add("mention_type", m_2.mentionType.ToString()).Add("dep_relation", depRelation).Add("dep_parent", depParent).Add("sentence", GetSentenceArray(m_2.sentenceWords)).Add("contained-in-other-mention"
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                , mentionsByHeadIndex[m_2.headIndex].Stream().AnyMatch(null) ? 1 : 0).Build());
            }
            IJsonArrayBuilder featureNames = Javax.Json.Json.CreateArrayBuilder().Add("same-speaker").Add("antecedent-is-mention-speaker").Add("mention-is-antecedent-speaker").Add("relaxed-head-match").Add("exact-string-match").Add("relaxed-string-match"
                                                                                                                                                                                                                                        );
            IJsonObjectBuilder features = Javax.Json.Json.CreateObjectBuilder();
            IJsonObjectBuilder labels   = Javax.Json.Json.CreateObjectBuilder();

            foreach (KeyValuePair <Pair <int, int>, bool> e in mentionPairs)
            {
                Mention           m1      = document.predictedMentionsByID[e.Key.first];
                Mention           m2      = document.predictedMentionsByID[e.Key.second];
                string            key     = m1.mentionNum + " " + m2.mentionNum;
                IJsonArrayBuilder builder = Javax.Json.Json.CreateArrayBuilder();
                foreach (int val in CategoricalFeatureExtractor.PairwiseFeatures(document, m1, m2, dictionaries, conll))
                {
                    builder.Add(val);
                }
                features.Add(key, builder.Build());
                labels.Add(key, e.Value ? 1 : 0);
            }
            IJsonObject docData = Javax.Json.Json.CreateObjectBuilder().Add("sentences", sentences.Build()).Add("mentions", mentions.Build()).Add("labels", labels.Build()).Add("pair_feature_names", featureNames.Build()).Add("pair_features", features.Build
                                                                                                                                                                                                                                    ()).Add("document_features", docFeatures.Build()).Build();

            dataWriter.Println(docData);
        }