Esempio n. 1
0
        /// <summary>Parse a JSON formatted tree into a SemanticGraph.</summary>
        /// <param name="jsonString">
        /// The JSON string tree to parse, e.g:
        /// "[{\"\"dependent\"\": 7, \"\"dep\"\": \"\"root\"\", \"\"governorgloss\"\": \"\"root\"\", \"\"governor\"\": 0, \"\"dependentgloss\"\": \"\"sport\"\"}, {\"\"dependent\"\": 1, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 2, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"is\"\"}, {\"\"dependent\"\": 3, \"\"dep\"\": \"\"neg\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"not\"\"}, {\"\"dependent\"\": 4, \"\"dep\"\": \"\"det\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"a\"\"}, {\"\"dependent\"\": 5, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"physical\"\", \"\"governor\"\": 6, \"\"dependentgloss\"\": \"\"predominantly\"\"}, {\"\"dependent\"\": 6, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"physical\"\"}, {\"\"dependent\"\": 9, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"yet\"\"}, {\"\"dependent\"\": 10, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"neither\"\"}, {\"\"dependent\"\": 11, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"are\"\"}, {\"\"dependent\"\": 12, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"shooting\"\"}, {\"\"dependent\"\": 13, \"\"dep\"\": \"\"cc\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"and\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"conj:and\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 16, \"\"dep\"\": \"\"nsubjpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"which\"\"}, {\"\"dependent\"\": 18, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"fact\"\", \"\"governor\"\": 19, \"\"dependentgloss\"\": \"\"in\"\"}, {\"\"dependent\"\": 19, \"\"dep\"\": \"\"nmod:in\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"fact\"\"}, {\"\"dependent\"\": 21, \"\"dep\"\": \"\"aux\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"has\"\"}, {\"\"dependent\"\": 22, \"\"dep\"\": \"\"auxpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"been\"\"}, {\"\"dependent\"\": 23, \"\"dep\"\": \"\"dep\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"nicknamed\"\"}, {\"\"dependent\"\": 25, \"\"dep\"\": \"\"dobj\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 26, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"ice\"\", \"\"governor\"\": 27, \"\"dependentgloss\"\": \"\"on\"\"}, {\"\"dependent\"\": 27, \"\"dep\"\": \"\"nmod:on\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"ice\"\"}, {\"\"dependent\"\": 29, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"5\"\"}]");
        /// </param>
        /// <param name="tokens">The tokens of the sentence, to form the backing labels of the tree.</param>
        /// <returns>A semantic graph of the sentence, according to the given tree.</returns>
        public static SemanticGraph ParseJsonTree(string jsonString, IList <CoreLabel> tokens)
        {
            // Escape quoted string parts
            IJsonReader   json  = Javax.Json.Json.CreateReader(new StringReader(jsonString));
            SemanticGraph tree  = new SemanticGraph();
            IJsonArray    array = json.ReadArray();

            if (array == null || array.IsEmpty())
            {
                return(tree);
            }
            IndexedWord[] vertices = new IndexedWord[tokens.Count + 2];
            // Add edges
            for (int i = 0; i < array.Count; i++)
            {
                IJsonObject entry = array.GetJsonObject(i);
                // Parse row
                int dependentIndex = entry.GetInt("dependent");
                if (vertices[dependentIndex] == null)
                {
                    if (dependentIndex > tokens.Count)
                    {
                        // Bizarre mismatch in sizes; the malt parser seems to do this often
                        return(new SemanticGraph());
                    }
                    vertices[dependentIndex] = new IndexedWord(tokens[dependentIndex - 1]);
                }
                IndexedWord dependent     = vertices[dependentIndex];
                int         governorIndex = entry.GetInt("governor");
                if (governorIndex > tokens.Count)
                {
                    // Bizarre mismatch in sizes; the malt parser seems to do this often
                    return(new SemanticGraph());
                }
                if (vertices[governorIndex] == null && governorIndex > 0)
                {
                    vertices[governorIndex] = new IndexedWord(tokens[governorIndex - 1]);
                }
                IndexedWord governor = vertices[governorIndex];
                string      relation = entry.GetString("dep");
                // Process row
                if (governorIndex == 0)
                {
                    tree.AddRoot(dependent);
                }
                else
                {
                    tree.AddVertex(dependent);
                    if (!tree.ContainsVertex(governor))
                    {
                        tree.AddVertex(governor);
                    }
                    if (!"ref".Equals(relation))
                    {
                        tree.AddEdge(governor, dependent, GrammaticalRelation.ValueOf(Language.English, relation), double.NegativeInfinity, false);
                    }
                }
            }
            return(tree);
        }
Esempio n. 2
0
        /// <summary>Parse a CoNLL formatted tree into a SemanticGraph.</summary>
        /// <param name="conll">The CoNLL tree to parse.</param>
        /// <param name="tokens">The tokens of the sentence, to form the backing labels of the tree.</param>
        /// <returns>A semantic graph of the sentence, according to the given tree.</returns>
        public static SemanticGraph ParseTree(string conll, IList <CoreLabel> tokens)
        {
            SemanticGraph tree = new SemanticGraph();

            if (conll == null || conll.IsEmpty())
            {
                return(tree);
            }
            string[]      treeLines = newline.Split(conll);
            IndexedWord[] vertices  = new IndexedWord[tokens.Count + 2];
            // Add edges
            foreach (string line in treeLines)
            {
                // Parse row
                string[] fields         = tab.Split(line);
                int      dependentIndex = System.Convert.ToInt32(fields[0]);
                if (vertices[dependentIndex] == null)
                {
                    if (dependentIndex > tokens.Count)
                    {
                        // Bizarre mismatch in sizes; the malt parser seems to do this often
                        return(new SemanticGraph());
                    }
                    vertices[dependentIndex] = new IndexedWord(tokens[dependentIndex - 1]);
                }
                IndexedWord dependent     = vertices[dependentIndex];
                int         governorIndex = System.Convert.ToInt32(fields[1]);
                if (governorIndex > tokens.Count)
                {
                    // Bizarre mismatch in sizes; the malt parser seems to do this often
                    return(new SemanticGraph());
                }
                if (vertices[governorIndex] == null && governorIndex > 0)
                {
                    vertices[governorIndex] = new IndexedWord(tokens[governorIndex - 1]);
                }
                IndexedWord governor = vertices[governorIndex];
                string      relation = fields[2];
                // Process row
                if (governorIndex == 0)
                {
                    tree.AddRoot(dependent);
                }
                else
                {
                    tree.AddVertex(dependent);
                    if (!tree.ContainsVertex(governor))
                    {
                        tree.AddVertex(governor);
                    }
                    if (!"ref".Equals(relation))
                    {
                        tree.AddEdge(governor, dependent, GrammaticalRelation.ValueOf(Language.English, relation), double.NegativeInfinity, false);
                    }
                }
            }
            return(tree);
        }
Esempio n. 3
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            string            relation = sm.GetRelnString(edgeName);
            IndexedWord       govNode  = GetNamedNode(govName, sm);
            IndexedWord       depNode  = GetNamedNode(depName, sm);
            SemanticGraphEdge edge     = sg.GetEdge(govNode, depNode, GrammaticalRelation.ValueOf(relation));

            if (edge != null)
            {
                sg.RemoveEdge(edge);
            }
        }
 public static void SetUp(Properties props)
 {
     ArgumentParser.FillOptions(typeof(DepPatternFactory), props);
     ArgumentParser.FillOptions(typeof(PatternFactory), props);
     foreach (string s in ignoreRels.Split("[,;]"))
     {
         ignoreRelsSet.Add(GrammaticalRelation.ValueOf(s));
     }
     foreach (string s_1 in allowedTagsForTrigger.Split("[,;]"))
     {
         allowedTagPatternForTrigger.Add(Pattern.Compile(s_1));
     }
 }
Esempio n. 5
0
            private static Pair <IndexedWord, GrammaticalRelation> GetGovAndReln(int govIdx, int copyCount, IndexedWord word, string relationName, IList <IndexedWord> sortedTokens)
            {
                IndexedWord         gov;
                GrammaticalRelation reln;

                if (relationName.Equals("root"))
                {
                    reln = GrammaticalRelation.Root;
                }
                else
                {
                    reln = GrammaticalRelation.ValueOf(Language.UniversalEnglish, relationName);
                }
                if (govIdx == 0)
                {
                    gov = new IndexedWord(word.DocID(), word.SentIndex(), 0);
                    gov.SetValue("ROOT");
                }
                else
                {
                    gov = CoNLLUDocumentReader.SentenceProcessor.GetToken(sortedTokens, govIdx, copyCount);
                }
                return(Generics.NewPair(gov, reln));
            }
Esempio n. 6
0
        /// <summary>Given a string entry, converts it into a SsurgeonEdit object.</summary>
        public static SsurgeonEdit ParseEditLine(string editLine)
        {
            // Extract the operation name first
            string[] tuples1 = editLine.Split("\\s+", 2);
            if (tuples1.Length < 2)
            {
                throw new ArgumentException("Error in SsurgeonEdit.parseEditLine: invalid number of arguments");
            }
            string command = tuples1[0];

            string[] argsArray            = ParseArgs(tuples1[1]);
            Ssurgeon.SsurgeonArgs argsBox = new Ssurgeon.SsurgeonArgs();
            for (int argIndex = 0; argIndex < argsArray.Length; ++argIndex)
            {
                switch (argsArray[argIndex])
                {
                case GovNodenameArg:
                {
                    argsBox.govNodeName = argsArray[argIndex + 1];
                    argIndex           += 2;
                    break;
                }

                case DepNodenameArg:
                {
                    argsBox.dep = argsArray[argIndex + 1];
                    argIndex   += 2;
                    break;
                }

                case EdgeNameArg:
                {
                    argsBox.edge = argsArray[argIndex + 1];
                    argIndex    += 2;
                    break;
                }

                case RelnArg:
                {
                    argsBox.reln = argsArray[argIndex + 1];
                    argIndex    += 2;
                    break;
                }

                case NodenameArg:
                {
                    argsBox.node = argsArray[argIndex + 1];
                    argIndex    += 2;
                    break;
                }

                case NodeProtoArg:
                {
                    argsBox.nodeString = argsArray[argIndex + 1];
                    argIndex          += 2;
                    break;
                }

                case WeightArg:
                {
                    argsBox.weight = double.ValueOf(argsArray[argIndex + 1]);
                    argIndex      += 2;
                    break;
                }

                case NameArg:
                {
                    argsBox.name = argsArray[argIndex + 1];
                    argIndex    += 2;
                    break;
                }

                default:
                {
                    throw new ArgumentException("Parsing Ssurgeon args: unknown flag " + argsArray[argIndex]);
                }
                }
            }
            // Parse the arguments based upon the type of command to execute.
            // TODO: this logic really should be moved into the individual classes.  The string-->class
            // mappings should also be stored in more appropriate data structure.
            SsurgeonEdit retEdit;

            if (Sharpen.Runtime.EqualsIgnoreCase(command, AddDep.Label))
            {
                retEdit = AddDep.CreateEngAddDep(argsBox.govNodeName, argsBox.reln, argsBox.nodeString);
            }
            else
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(command, AddNode.Label))
                {
                    retEdit = AddNode.CreateAddNode(argsBox.nodeString, argsBox.name);
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(command, AddEdge.Label))
                    {
                        retEdit = AddEdge.CreateEngAddEdge(argsBox.govNodeName, argsBox.dep, argsBox.reln);
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(command, DeleteGraphFromNode.Label))
                        {
                            retEdit = new DeleteGraphFromNode(argsBox.node);
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(command, RemoveEdge.Label))
                            {
                                retEdit = new RemoveEdge(GrammaticalRelation.ValueOf(argsBox.reln), argsBox.govNodeName, argsBox.dep);
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(command, RemoveNamedEdge.Label))
                                {
                                    retEdit = new RemoveNamedEdge(argsBox.edge, argsBox.govNodeName, argsBox.dep);
                                }
                                else
                                {
                                    if (Sharpen.Runtime.EqualsIgnoreCase(command, SetRoots.Label))
                                    {
                                        string[]       names    = tuples1[1].Split("\\s+");
                                        IList <string> newRoots = Arrays.AsList(names);
                                        retEdit = new SetRoots(newRoots);
                                    }
                                    else
                                    {
                                        if (Sharpen.Runtime.EqualsIgnoreCase(command, KillNonRootedNodes.Label))
                                        {
                                            retEdit = new KillNonRootedNodes();
                                        }
                                        else
                                        {
                                            if (Sharpen.Runtime.EqualsIgnoreCase(command, KillAllIncomingEdges.Label))
                                            {
                                                retEdit = new KillAllIncomingEdges(argsBox.node);
                                            }
                                            else
                                            {
                                                throw new ArgumentException("Error in SsurgeonEdit.parseEditLine: command '" + command + "' is not supported");
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(retEdit);
        }
Esempio n. 7
0
 public static GrammaticalRelation ValueOf(string s)
 {
     return(GrammaticalRelation.ValueOf(s, Values(), ValuesLock()));
 }
        //      collapseMultiwordPreps(list);
        private static void CollapsePrepAndPoss(ICollection <TypedDependency> list)
        {
            ICollection <TypedDependency> newTypedDeps = new List <TypedDependency>();
            // Construct a map from words to the set of typed
            // dependencies in which the word appears as governor.
            IDictionary <IndexedWord, ICollection <TypedDependency> > map = Generics.NewHashMap();

            foreach (TypedDependency typedDep in list)
            {
                if (!map.Contains(typedDep.Gov()))
                {
                    map[typedDep.Gov()] = Generics.NewHashSet <TypedDependency>();
                }
                map[typedDep.Gov()].Add(typedDep);
            }
            //log.info("here's the map: " + map);
            foreach (TypedDependency td1 in list)
            {
                if (td1.Reln() != GrammaticalRelation.Kill)
                {
                    IndexedWord td1Dep    = td1.Dep();
                    string      td1DepPOS = td1Dep.Tag();
                    // find all other typedDeps having our dep as gov
                    ICollection <TypedDependency> possibles = map[td1Dep];
                    if (possibles != null)
                    {
                        // look for the "second half"
                        foreach (TypedDependency td2 in possibles)
                        {
                            // TreeGraphNode td2Dep = td2.dep();
                            // String td2DepPOS = td2Dep.parent().value();
                            if (td1.Reln() == GrammaticalRelation.Dependent && td2.Reln() == GrammaticalRelation.Dependent && td1DepPOS.Equals("P"))
                            {
                                GrammaticalRelation td3reln = UniversalChineseGrammaticalRelations.ValueOf(td1Dep.Value());
                                if (td3reln == null)
                                {
                                    td3reln = GrammaticalRelation.ValueOf(Language.UniversalChinese, td1Dep.Value());
                                }
                                TypedDependency td3 = new TypedDependency(td3reln, td1.Gov(), td2.Dep());
                                //log.info("adding: " + td3);
                                newTypedDeps.Add(td3);
                                td1.SetReln(GrammaticalRelation.Kill);
                                // remember these are "used up"
                                td2.SetReln(GrammaticalRelation.Kill);
                            }
                        }
                        // remember these are "used up"
                        // Now we need to see if there any TDs that will be "orphaned"
                        // by this collapse.  Example: if we have:
                        //   dep(drew, on)
                        //   dep(on, book)
                        //   dep(on, right)
                        // the first two will be collapsed to on(drew, book), but then
                        // the third one will be orphaned, since its governor no
                        // longer appears.  So, change its governor to 'drew'.
                        if (td1.Reln().Equals(GrammaticalRelation.Kill))
                        {
                            foreach (TypedDependency td2_1 in possibles)
                            {
                                if (!td2_1.Reln().Equals(GrammaticalRelation.Kill))
                                {
                                    //log.info("td1 & td2: " + td1 + " & " + td2);
                                    td2_1.SetGov(td1.Gov());
                                }
                            }
                        }
                    }
                }
            }
            // now copy remaining unkilled TDs from here to new
            foreach (TypedDependency td in list)
            {
                if (!td.Reln().Equals(GrammaticalRelation.Kill))
                {
                    newTypedDeps.Add(td);
                }
            }
            list.Clear();
            // forget all (esp. killed) TDs
            Sharpen.Collections.AddAll(list, newTypedDeps);
        }
Esempio n. 9
0
 /// <summary>Returns all of the entailed shortened clauses (as per natural logic) from the given clause.</summary>
 /// <remarks>
 /// Returns all of the entailed shortened clauses (as per natural logic) from the given clause.
 /// This runs the forward entailment component of the OpenIE system only.
 /// It is usually chained together with the clause splitting component:
 /// <see cref="ClausesInSentence(Edu.Stanford.Nlp.Util.ICoreMap)"/>
 /// .
 /// </remarks>
 /// <param name="clause">The premise clause, as a sentence fragment in itself.</param>
 /// <returns>A list of entailed clauses.</returns>
 public virtual IList <SentenceFragment> EntailmentsFromClause(SentenceFragment clause)
 {
     if (clause.parseTree.IsEmpty())
     {
         return(Java.Util.Collections.EmptyList());
     }
     else
     {
         // Get the forward entailments
         IList <SentenceFragment> list = new List <SentenceFragment>();
         if (entailmentsPerSentence > 0)
         {
             Sharpen.Collections.AddAll(list, forwardEntailer.Apply(clause.parseTree, true).Search().Stream().Map(null).Collect(Collectors.ToList()));
         }
         list.Add(clause);
         // A special case for adjective entailments
         IList <SentenceFragment> adjFragments = new List <SentenceFragment>();
         SemgrexMatcher           matcher      = adjectivePattern.Matcher(clause.parseTree);
         while (matcher.Find())
         {
             // (get nodes)
             IndexedWord subj = matcher.GetNode("subj");
             IndexedWord be   = matcher.GetNode("be");
             IndexedWord adj  = matcher.GetNode("adj");
             IndexedWord obj  = matcher.GetNode("obj");
             IndexedWord pobj = matcher.GetNode("pobj");
             string      prep = matcher.GetRelnString("prep");
             // (if the adjective, or any earlier adjective, is privative, then all bets are off)
             foreach (SemanticGraphEdge edge in clause.parseTree.OutgoingEdgeIterable(obj))
             {
                 if ("amod".Equals(edge.GetRelation().ToString()) && edge.GetDependent().Index() <= adj.Index() && Edu.Stanford.Nlp.Naturalli.Util.PrivativeAdjectives.Contains(edge.GetDependent().Word().ToLower()))
                 {
                     goto OUTER_continue;
                 }
             }
             // (create the core tree)
             SemanticGraph tree = new SemanticGraph();
             tree.AddRoot(adj);
             tree.AddVertex(subj);
             tree.AddVertex(be);
             tree.AddEdge(adj, be, GrammaticalRelation.ValueOf(Language.English, "cop"), double.NegativeInfinity, false);
             tree.AddEdge(adj, subj, GrammaticalRelation.ValueOf(Language.English, "nsubj"), double.NegativeInfinity, false);
             // (add pp attachment, if it existed)
             if (pobj != null)
             {
                 System.Diagnostics.Debug.Assert(prep != null);
                 tree.AddEdge(adj, pobj, GrammaticalRelation.ValueOf(Language.English, prep), double.NegativeInfinity, false);
             }
             // (check for monotonicity)
             if (adj.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)).IsUpwards() && be.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)).IsUpwards())
             {
                 // (add tree)
                 adjFragments.Add(new SentenceFragment(tree, clause.assumedTruth, false));
             }
             OUTER_continue :;
         }
         OUTER_break :;
         Sharpen.Collections.AddAll(list, adjFragments);
         return(list);
     }
 }
Esempio n. 10
0
        /// <summary>Fix some bizarre peculiarities with certain trees.</summary>
        /// <remarks>
        /// Fix some bizarre peculiarities with certain trees.
        /// So far, these include:
        /// <ul>
        /// <li>Sometimes there's a node from a word to itself. This seems wrong.</li>
        /// </ul>
        /// </remarks>
        /// <param name="tree">The tree to clean (in place!).</param>
        /// <returns>A list of extra edges, which are valid but were removed.</returns>
        public static IList <SemanticGraphEdge> CleanTree(SemanticGraph tree)
        {
            //    assert !isCyclic(tree);
            // Clean nodes
            IList <IndexedWord> toDelete = new List <IndexedWord>();

            foreach (IndexedWord vertex in tree.VertexSet())
            {
                // Clean punctuation
                if (vertex.Tag() == null)
                {
                    continue;
                }
                char tag = vertex.BackingLabel().Tag()[0];
                if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':')
                {
                    if (!tree.OutgoingEdgeIterator(vertex).MoveNext())
                    {
                        // This should really never happen, but it does.
                        toDelete.Add(vertex);
                    }
                }
            }
            toDelete.ForEach(null);
            // Clean edges
            IEnumerator <SemanticGraphEdge> iter = tree.EdgeIterable().GetEnumerator();
            IList <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> > toAdd = new List <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> >();

            toDelete.Clear();
            while (iter.MoveNext())
            {
                SemanticGraphEdge edge = iter.Current;
                if (edge.GetDependent().Index() == edge.GetGovernor().Index())
                {
                    // Clean up copy-edges
                    if (edge.GetDependent().IsCopy(edge.GetGovernor()))
                    {
                        foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetDependent()))
                        {
                            toAdd.Add(Triple.MakeTriple(edge.GetGovernor(), toCopy.GetDependent(), toCopy));
                        }
                        toDelete.Add(edge.GetDependent());
                    }
                    if (edge.GetGovernor().IsCopy(edge.GetDependent()))
                    {
                        foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetGovernor()))
                        {
                            toAdd.Add(Triple.MakeTriple(edge.GetDependent(), toCopy.GetDependent(), toCopy));
                        }
                        toDelete.Add(edge.GetGovernor());
                    }
                    // Clean self-edges
                    iter.Remove();
                }
                else
                {
                    if (edge.GetRelation().ToString().Equals("punct"))
                    {
                        // Clean punctuation (again)
                        if (!tree.OutgoingEdgeIterator(edge.GetDependent()).MoveNext())
                        {
                            // This should really never happen, but it does.
                            iter.Remove();
                        }
                    }
                }
            }
            // (add edges we wanted to add)
            toDelete.ForEach(null);
            foreach (Triple <IndexedWord, IndexedWord, SemanticGraphEdge> edge_1 in toAdd)
            {
                tree.AddEdge(edge_1.first, edge_1.second, edge_1.third.GetRelation(), edge_1.third.GetWeight(), edge_1.third.IsExtra());
            }
            // Handle extra edges.
            // Two cases:
            // (1) the extra edge is a subj/obj edge and the main edge is a conj:.*
            //     in this case, keep the extra
            // (2) otherwise, delete the extra
            IList <SemanticGraphEdge> extraEdges = new List <SemanticGraphEdge>();

            foreach (SemanticGraphEdge edge_2 in tree.EdgeIterable())
            {
                if (edge_2.IsExtra())
                {
                    IList <SemanticGraphEdge> incomingEdges = tree.IncomingEdgeList(edge_2.GetDependent());
                    SemanticGraphEdge         toKeep        = null;
                    foreach (SemanticGraphEdge candidate in incomingEdges)
                    {
                        if (toKeep == null)
                        {
                            toKeep = candidate;
                        }
                        else
                        {
                            if (toKeep.GetRelation().ToString().StartsWith("conj") && candidate.GetRelation().ToString().Matches(".subj.*|.obj.*"))
                            {
                                toKeep = candidate;
                            }
                            else
                            {
                                if (!candidate.IsExtra() && !(candidate.GetRelation().ToString().StartsWith("conj") && toKeep.GetRelation().ToString().Matches(".subj.*|.obj.*")))
                                {
                                    toKeep = candidate;
                                }
                            }
                        }
                    }
                    foreach (SemanticGraphEdge candidate_1 in incomingEdges)
                    {
                        if (candidate_1 != toKeep)
                        {
                            extraEdges.Add(candidate_1);
                        }
                    }
                }
            }
            extraEdges.ForEach(null);
            // Add apposition edges (simple coref)
            foreach (SemanticGraphEdge extraEdge in new List <SemanticGraphEdge>(extraEdges))
            {
                // note[gabor] prevent concurrent modification exception
                foreach (SemanticGraphEdge candidateAppos in tree.IncomingEdgeIterable(extraEdge.GetDependent()))
                {
                    if (candidateAppos.GetRelation().ToString().Equals("appos"))
                    {
                        extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos.GetGovernor(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra()));
                    }
                }
                foreach (SemanticGraphEdge candidateAppos_1 in tree.OutgoingEdgeIterable(extraEdge.GetDependent()))
                {
                    if (candidateAppos_1.GetRelation().ToString().Equals("appos"))
                    {
                        extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos_1.GetDependent(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra()));
                    }
                }
            }
            // Brute force ensure tree
            // Remove incoming edges from roots
            IList <SemanticGraphEdge> rootIncomingEdges = new List <SemanticGraphEdge>();

            foreach (IndexedWord root in tree.GetRoots())
            {
                foreach (SemanticGraphEdge incomingEdge in tree.IncomingEdgeIterable(root))
                {
                    rootIncomingEdges.Add(incomingEdge);
                }
            }
            rootIncomingEdges.ForEach(null);
            // Loop until it becomes a tree.
            bool changed = true;

            while (changed)
            {
                // I just want trees to be trees; is that so much to ask!?
                changed = false;
                IList <IndexedWord>       danglingNodes = new List <IndexedWord>();
                IList <SemanticGraphEdge> invalidEdges  = new List <SemanticGraphEdge>();
                foreach (IndexedWord vertex_1 in tree.VertexSet())
                {
                    // Collect statistics
                    IEnumerator <SemanticGraphEdge> incomingIter = tree.IncomingEdgeIterator(vertex_1);
                    bool hasIncoming         = incomingIter.MoveNext();
                    bool hasMultipleIncoming = false;
                    if (hasIncoming)
                    {
                        incomingIter.Current;
                        hasMultipleIncoming = incomingIter.MoveNext();
                    }
                    // Register actions
                    if (!hasIncoming && !tree.GetRoots().Contains(vertex_1))
                    {
                        danglingNodes.Add(vertex_1);
                    }
                    else
                    {
                        if (hasMultipleIncoming)
                        {
                            foreach (SemanticGraphEdge edge in new IterableIterator <SemanticGraphEdge>(incomingIter))
                            {
                                invalidEdges.Add(edge_2);
                            }
                        }
                    }
                }
                // Perform actions
                foreach (IndexedWord vertex_2 in danglingNodes)
                {
                    tree.RemoveVertex(vertex_2);
                    changed = true;
                }
                foreach (SemanticGraphEdge edge_3 in invalidEdges)
                {
                    tree.RemoveEdge(edge_3);
                    changed = true;
                }
            }
            // Edge case: remove duplicate dobj to "that."
            //            This is a common parse error.
            foreach (IndexedWord vertex_3 in tree.VertexSet())
            {
                SemanticGraphEdge thatEdge = null;
                int dobjCount = 0;
                foreach (SemanticGraphEdge edge in tree.OutgoingEdgeIterable(vertex_3))
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase("that", edge_2.GetDependent().Word()))
                    {
                        thatEdge = edge_2;
                    }
                    if ("dobj".Equals(edge_2.GetRelation().ToString()))
                    {
                        dobjCount += 1;
                    }
                }
                if (dobjCount > 1 && thatEdge != null)
                {
                    // Case: there are two dobj edges, one of which goes to the word "that"
                    // Action: rewrite the dobj edge to "that" to be a "mark" edge.
                    tree.RemoveEdge(thatEdge);
                    tree.AddEdge(thatEdge.GetGovernor(), thatEdge.GetDependent(), GrammaticalRelation.ValueOf(thatEdge.GetRelation().GetLanguage(), "mark"), thatEdge.GetWeight(), thatEdge.IsExtra());
                }
            }
            // Return
            System.Diagnostics.Debug.Assert(IsTree(tree));
            return(extraEdges);
        }
Esempio n. 11
0
            public virtual SemanticGraph ConvertIntermediateGraph(IList <CoreLabel> sentence)
            {
                SemanticGraph graph = new SemanticGraph();
                // First construct the actual nodes; keep them indexed by their index and copy count.
                // Sentences such as "I went over the river and through the woods" have
                // two copies for "went" in the collapsed dependencies.
                TwoDimensionalMap <int, int, IndexedWord> nodeMap = TwoDimensionalMap.HashMap();

                foreach (AnnotationSerializer.IntermediateNode @in in nodes)
                {
                    CoreLabel token = sentence[@in.index - 1];
                    // index starts at 1!
                    IndexedWord word;
                    if (@in.copyAnnotation > 0)
                    {
                        // TODO: if we make a copy wrapper CoreLabel, use it here instead
                        word = new IndexedWord(new CoreLabel(token));
                        word.SetCopyCount(@in.copyAnnotation);
                    }
                    else
                    {
                        word = new IndexedWord(token);
                    }
                    // for backwards compatibility - new annotations should have
                    // these fields set, but annotations older than August 2014 might not
                    if (word.DocID() == null && @in.docId != null)
                    {
                        word.SetDocID(@in.docId);
                    }
                    if (word.SentIndex() < 0 && @in.sentIndex >= 0)
                    {
                        word.SetSentIndex(@in.sentIndex);
                    }
                    if (word.Index() < 0 && @in.index >= 0)
                    {
                        word.SetIndex(@in.index);
                    }
                    nodeMap.Put(word.Index(), word.CopyCount(), word);
                    graph.AddVertex(word);
                    if (@in.isRoot)
                    {
                        graph.AddRoot(word);
                    }
                }
                // add all edges to the actual graph
                foreach (AnnotationSerializer.IntermediateEdge ie in edges)
                {
                    IndexedWord source = nodeMap.Get(ie.source, ie.sourceCopy);
                    if (source == null)
                    {
                        throw new RuntimeIOException("Failed to find node " + ie.source + "-" + ie.sourceCopy);
                    }
                    IndexedWord target = nodeMap.Get(ie.target, ie.targetCopy);
                    if (target == null)
                    {
                        throw new RuntimeIOException("Failed to find node " + ie.target + "-" + ie.targetCopy);
                    }
                    // assert(target != null);
                    lock (Lock)
                    {
                        // this is not thread-safe: there are static fields in GrammaticalRelation
                        GrammaticalRelation rel = GrammaticalRelation.ValueOf(ie.dep);
                        graph.AddEdge(source, target, rel, 1.0, ie.isExtra);
                    }
                }
                // compute root nodes if they weren't stored in the graph
                if (!graph.IsEmpty() && graph.GetRoots().Count == 0)
                {
                    graph.ResetRoots();
                }
                return(graph);
            }