/// <summary> /// Given a path to a file containing a list of SsurgeonPatterns, returns /// TODO: deal with resources /// </summary> /// <exception cref="System.Exception"/> public virtual IList <SsurgeonPattern> ReadFromFile(File file) { IList <SsurgeonPattern> retList = new List <SsurgeonPattern>(); IDocument doc = DocumentBuilderFactory.NewInstance().NewDocumentBuilder().Parse(file); INodeList patternNodes = doc.GetElementsByTagName(SsurgeonPattern.SsurgeonElemTag); for (int i = 0; i < patternNodes.GetLength(); i++) { INode node = patternNodes.Item(i); if (node.GetNodeType() == NodeConstants.ElementNode) { IElement elt = (IElement)node; SsurgeonPattern pattern = SsurgeonPatternFromXML(elt); retList.Add(pattern); } } INodeList resourceNodes = doc.GetElementsByTagName(SsurgeonPattern.ResourceTag); for (int i_1 = 0; i_1 < resourceNodes.GetLength(); i_1++) { INode node = patternNodes.Item(i_1); if (node.GetNodeType() == NodeConstants.ElementNode) { IElement resourceElt = (IElement)node; SsurgeonWordlist wlRsrc = new SsurgeonWordlist(resourceElt); AddResource(wlRsrc); } } return(retList); }
private static void GetMatchingNodes(INode node, string[] nodePath, int cur, IList <INode> res) { if (cur < 0 || cur >= nodePath.Length) { return; } bool last = (cur == nodePath.Length - 1); string name = nodePath[cur]; if (node.HasChildNodes()) { INodeList children = node.GetChildNodes(); for (int i = 0; i < children.GetLength(); i++) { INode c = children.Item(i); if (name.Equals(c.GetNodeName())) { if (last) { res.Add(c); } else { GetMatchingNodes(c, nodePath, cur + 1, res); } } } } }
/// <summary> /// Given the root Element for a SemgrexPattern (SSURGEON_ELEM_TAG), converts /// it into its corresponding SemgrexPattern object. /// </summary> /// <exception cref="System.Exception"/> public static SsurgeonPattern SsurgeonPatternFromXML(IElement elt) { string uid = GetTagText(elt, SsurgeonPattern.UidElemTag); string notes = GetTagText(elt, SsurgeonPattern.NotesElemTag); string semgrexString = GetTagText(elt, SsurgeonPattern.SemgrexElemTag); SemgrexPattern semgrexPattern = SemgrexPattern.Compile(semgrexString); SsurgeonPattern retPattern = new SsurgeonPattern(uid, semgrexPattern); retPattern.SetNotes(notes); INodeList editNodes = elt.GetElementsByTagName(SsurgeonPattern.EditListElemTag); for (int i = 0; i < editNodes.GetLength(); i++) { INode node = editNodes.Item(i); if (node.GetNodeType() == NodeConstants.ElementNode) { IElement editElt = (IElement)node; string editVal = GetEltText(editElt); retPattern.AddEdit(Edu.Stanford.Nlp.Semgraph.Semgrex.Ssurgeon.Ssurgeon.ParseEditLine(editVal)); } } // If predicate available, parse IElement predElt = GetFirstTag(elt, SsurgeonPattern.PredicateTag); if (predElt != null) { ISsurgPred pred = AssemblePredFromXML(GetFirstChildElement(predElt)); retPattern.SetPredicate(pred); } return(retPattern); }
private static void GetMatchingNodes(INode node, Pattern[] nodePath, int cur, IList <INode> res) { if (cur < 0 || cur >= nodePath.Length) { return; } bool last = (cur == nodePath.Length - 1); Pattern pattern = nodePath[cur]; INodeList children = node.GetChildNodes(); for (int i = 0; i < children.GetLength(); i++) { INode c = children.Item(i); if (pattern.Matcher(c.GetNodeName()).Matches()) { if (last) { res.Add(c); } else { GetMatchingNodes(c, nodePath, cur + 1, res); } } } }
public static void RemoveChildren(INode e) { INodeList list = e.GetChildNodes(); for (int i = 0; i < list.GetLength(); i++) { INode n = list.Item(i); e.RemoveChild(n); } }
/// <summary>Reconstructs the resource from the XML file</summary> public SsurgeonWordlist(IElement rootElt) { id = rootElt.GetAttribute("id"); INodeList wordEltNL = rootElt.GetElementsByTagName(WordElt); for (int i = 0; i < wordEltNL.GetLength(); i++) { INode node = wordEltNL.Item(i); if (node.GetNodeType() == NodeConstants.ElementNode) { string word = Edu.Stanford.Nlp.Semgraph.Semgrex.Ssurgeon.Ssurgeon.GetEltText((IElement)node); words.Add(word); } } }
/// <summary>Searches for all immediate children with the given name</summary> protected internal static IList <INode> GetChildrenByName(INode node, string name) { IList <INode> matches = new List <INode>(); INodeList children = node.GetChildNodes(); // search children for (int i = 0; i < children.GetLength(); i++) { INode child = children.Item(i); if (child.GetNodeName().Equals(name)) { matches.Add(child); } } return(matches); }
public static string GetJustText(INode text) { StringBuilder sb = new StringBuilder(); INodeList textElems = text.GetChildNodes(); for (int i = 0; i < textElems.GetLength(); i++) { INode child = textElems.Item(i); string str = child.GetTextContent(); //replace single occurrence of \n with " ", double occurrences with a single one. str = str.ReplaceAll("\n(?!\n)", " "); str = str.ReplaceAll("_", string.Empty); //bug fix for sentence splitting sb.Append(str + " "); } return(sb.ToString()); }
/// <summary> /// For a given Element, treats the first child as a text element /// and returns its value. /// </summary> public static string GetEltText(IElement element) { try { INodeList childNodeList = element.GetChildNodes(); if (childNodeList.GetLength() == 0) { return(string.Empty); } return(childNodeList.Item(0).GetNodeValue()); } catch (Exception e) { log.Warning("Exception e=" + e.Message + " thrown calling getEltText on element=" + element); } return(string.Empty); }
/// <summary>Returns the first child whose node type is Element under the given Element.</summary> private static IElement GetFirstChildElement(IElement element) { try { INodeList nodeList = element.GetChildNodes(); for (int i = 0; i < nodeList.GetLength(); i++) { INode node = nodeList.Item(i); if (node.GetNodeType() == NodeConstants.ElementNode) { return((IElement)node); } } } catch (Exception e) { log.Warning("Error getting first child Element for element=" + element + ", exception=" + e); } return(null); }
//Silently ignore public virtual Tree ReadTree() { Tree t = null; while (t == null && sentences != null && sentIdx < sentences.GetLength()) { INode sentRoot = sentences.Item(sentIdx++); t = GetTreeFromXML(sentRoot); if (t != null) { t = treeNormalizer.NormalizeWholeTree(t, treeFactory); if (t.Label() is CoreLabel) { string ftbId = ((IElement)sentRoot).GetAttribute(AttrNumber); ((CoreLabel)t.Label()).Set(typeof(CoreAnnotations.SentenceIDAnnotation), ftbId); } } } return(t); }
//Silently ignore public virtual Tree ReadTree() { Tree t = null; while (t == null && sentences != null && sentIdx < sentences.GetLength()) { int thisSentenceId = sentIdx++; INode sentRoot = sentences.Item(thisSentenceId); t = GetTreeFromXML(sentRoot); if (t != null) { t = treeNormalizer.NormalizeWholeTree(t, treeFactory); if (t.Label() is CoreLabel) { ((CoreLabel)t.Label()).Set(typeof(CoreAnnotations.SentenceIDAnnotation), int.ToString(thisSentenceId)); } } } return(t); }
/// <summary>Searches (recursively) for the first child that has the given name</summary> protected internal static INode GetChildByName(INode node, string name) { INodeList children = node.GetChildNodes(); // this node matches if (node.GetNodeName().Equals(name)) { return(node); } // search children for (int i = 0; i < children.GetLength(); i++) { INode found = GetChildByName(children.Item(i), name); if (found != null) { return(found); } } // failed return(null); }
public static IList <Person> ReadXMLCharacterList(IDocument doc) { IList <Person> personList = new List <Person>(); INodeList characters = doc.GetDocumentElement().GetElementsByTagName("characters").Item(0).GetChildNodes(); for (int i = 0; i < characters.GetLength(); i++) { INode child = characters.Item(i); if (child.GetNodeName().Equals("character")) { string name = child.GetAttributes().GetNamedItem("name").GetNodeValue(); char[] cName = name.ToCharArray(); cName[0] = char.ToUpperCase(cName[0]); name = new string(cName); IList <string> aliases = Arrays.AsList(child.GetAttributes().GetNamedItem("aliases").GetNodeValue().Split(";")); string gender = (child.GetAttributes().GetNamedItem("gender") == null) ? string.Empty : child.GetAttributes().GetNamedItem("gender").GetNodeValue(); personList.Add(new Person(child.GetAttributes().GetNamedItem("name").GetNodeValue(), gender, aliases)); } } return(personList); }
/// <summary>Returns all of the Element typed children from the given element.</summary> /// <remarks> /// Returns all of the Element typed children from the given element. Note: disregards /// other node types. /// </remarks> private static IList <IElement> GetChildElements(IElement element) { LinkedList <IElement> childElements = new LinkedList <IElement>(); try { INodeList nodeList = element.GetChildNodes(); for (int i = 0; i < nodeList.GetLength(); i++) { INode node = nodeList.Item(i); if (node.GetNodeType() == NodeConstants.ElementNode) { childElements.Add((IElement)node); } } } catch (Exception e) { log.Warning("Exception thrown getting all children for element=" + element + ", e=" + e); } return(childElements); }
/// <summary>Searches for children that have the given name and attribute</summary> protected internal static INode GetChildByNameAndAttribute(INode node, string name, string attributeName, string attributeValue) { INodeList children = node.GetChildNodes(); INamedNodeMap attribs = node.GetAttributes(); INode attribute = null; // this node matches if (node.GetNodeName().Equals(name) && attribs != null && (attribute = attribs.GetNamedItem(attributeName)) != null && attribute.GetNodeValue().Equals(attributeValue)) { return(node); } // search children for (int i = 0; i < children.GetLength(); i++) { INode found = GetChildByAttribute(children.Item(i), attributeName, attributeValue); if (found != null) { return(found); } } // failed return(null); }
/// <summary>For the given element, finds the first child Element with the given tag.</summary> private static IElement GetFirstTag(IElement element, string tag) { try { INodeList nodeList = element.GetElementsByTagName(tag); if (nodeList.GetLength() == 0) { return(null); } for (int i = 0; i < nodeList.GetLength(); i++) { INode node = nodeList.Item(i); if (node.GetNodeType() == NodeConstants.ElementNode) { return((IElement)node); } } } catch (Exception) { log.Warning("Error getting first tag " + tag + " under element=" + element); } return(null); }
private bool Load(InputStream stream) { DocumentBuilder parser = XMLUtils.GetXmlParser(); if (parser == null) { return(false); } try { IDocument xmlDocument = parser.Parse(stream); IElement root = xmlDocument.GetDocumentElement(); INodeList sentences = root.GetElementsByTagName(Sentence); for (int i = 0; i < sentences.GetLength(); i++) { IElement sentence = (IElement)sentences.Item(i); Lattice lattice = new Lattice(); //Create the node map ISortedSet <int> nodes = new TreeSet <int>(); INodeList xmlNodes = sentence.GetElementsByTagName(Node); for (int nodeIdx = 0; nodeIdx < xmlNodes.GetLength(); nodeIdx++) { IElement xmlNode = (IElement)xmlNodes.Item(nodeIdx); int nodeName = System.Convert.ToInt32(xmlNode.GetAttribute(NodeId)); nodes.Add(nodeName); } IDictionary <int, int> nodeMap = Generics.NewHashMap(); int realNodeIdx = 0; int lastBoundaryNode = -1; foreach (int nodeName_1 in nodes) { if (lastBoundaryNode == -1) { System.Diagnostics.Debug.Assert(nodeName_1 % NodeOffset == 0); lastBoundaryNode = realNodeIdx; } else { if (nodeName_1 % NodeOffset == 0) { ParserConstraint c = new ParserConstraint(lastBoundaryNode, realNodeIdx, ".*"); lattice.AddConstraint(c); } } nodeMap[nodeName_1] = realNodeIdx; realNodeIdx++; } //Read the edges INodeList xmlEdges = sentence.GetElementsByTagName(Edge); for (int edgeIdx = 0; edgeIdx < xmlEdges.GetLength(); edgeIdx++) { IElement xmlEdge = (IElement)xmlEdges.Item(edgeIdx); string segment = xmlEdge.GetAttribute(Segment); double weight = double.Parse(xmlEdge.GetAttribute(Weight)); //Input weights should be log scale int from = System.Convert.ToInt32(xmlEdge.GetAttribute(FromNode)); int normFrom = nodeMap[from]; int to = System.Convert.ToInt32(xmlEdge.GetAttribute(ToNode)); int normTo = nodeMap[to]; LatticeEdge e = new LatticeEdge(segment, weight, normFrom, normTo); // Set attributes below here INodeList xmlAttrs = xmlEdge.GetElementsByTagName(EAttrNode); for (int attrIdx = 0; attrIdx < xmlAttrs.GetLength(); attrIdx++) { IElement xmlAttr = (IElement)xmlAttrs.Item(attrIdx); string key = xmlAttr.GetAttribute(EAttr); string value = xmlAttr.GetAttribute(EAttrVal); e.SetAttr(key, value); } lattice.AddEdge(e); } //Configure for parsing in ExhaustivePCFG parser lattice.AddBoundary(); lattices.Add(lattice); } } catch (IOException e) { System.Console.Error.Printf("%s: Error reading XML from input stream.%n", this.GetType().FullName); Sharpen.Runtime.PrintStackTrace(e); return(false); } catch (SAXException e) { Sharpen.Runtime.PrintStackTrace(e); return(false); } return(true); }
private static IList <ICoreMap> ToTimexCoreMaps(IElement docElem, ICoreMap originalDocument) { //--Collect Token Offsets IDictionary <int, int> beginMap = Generics.NewHashMap(); IDictionary <int, int> endMap = Generics.NewHashMap(); bool haveTokenOffsets = true; foreach (ICoreMap sent in originalDocument.Get(typeof(CoreAnnotations.SentencesAnnotation))) { foreach (CoreLabel token in sent.Get(typeof(CoreAnnotations.TokensAnnotation))) { int tokBegin = token.Get(typeof(CoreAnnotations.TokenBeginAnnotation)); int tokEnd = token.Get(typeof(CoreAnnotations.TokenEndAnnotation)); if (tokBegin == null || tokEnd == null) { haveTokenOffsets = false; } int charBegin = token.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)); int charEnd = token.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)); beginMap[charBegin] = tokBegin; endMap[charEnd] = tokEnd; } } IList <ICoreMap> timexMaps = new List <ICoreMap>(); int offset = 0; INodeList docNodes = docElem.GetChildNodes(); for (int i = 0; i < docNodes.GetLength(); i++) { INode content = docNodes.Item(i); if (content is IText) { IText text = (IText)content; offset += text.GetWholeText().Length; } else { if (content is IElement) { IElement child = (IElement)content; if (child.GetNodeName().Equals("TIMEX3")) { Timex timex = new Timex(child); if (child.GetChildNodes().GetLength() != 1) { throw new Exception("TIMEX3 should only contain text " + child); } string timexText = child.GetTextContent(); ICoreMap timexMap = new ArrayCoreMap(); timexMap.Set(typeof(TimeAnnotations.TimexAnnotation), timex); timexMap.Set(typeof(CoreAnnotations.TextAnnotation), timexText); int charBegin = offset; timexMap.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), offset); offset += timexText.Length; timexMap.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), offset); int charEnd = offset; //(tokens) if (haveTokenOffsets) { int tokBegin = beginMap[charBegin]; int searchStep = 1; //if no exact match, search around the character offset while (tokBegin == null) { tokBegin = beginMap[charBegin - searchStep]; if (tokBegin == null) { tokBegin = beginMap[charBegin + searchStep]; } searchStep += 1; } searchStep = 1; int tokEnd = endMap[charEnd]; while (tokEnd == null) { tokEnd = endMap[charEnd - searchStep]; if (tokEnd == null) { tokEnd = endMap[charEnd + searchStep]; } searchStep += 1; } timexMap.Set(typeof(CoreAnnotations.TokenBeginAnnotation), tokBegin); timexMap.Set(typeof(CoreAnnotations.TokenEndAnnotation), tokEnd); } timexMaps.Add(timexMap); } else { throw new Exception("unexpected element " + child); } } else { throw new Exception("unexpected content " + content); } } } return(timexMaps); }
/// <summary>Parses one ACE specification</summary> /// <returns>Simply displays the events to stdout</returns> /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Xml.Sax.SAXException"/> /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/> public static AceDocument ParseDocument(File f) { // parse the Dom document IDocument document = ReadDocument(f); // // create the ACE document object // INode docElement = document.GetElementsByTagName("document").Item(0); AceDocument aceDoc = new AceDocument(GetAttributeValue(docElement, "DOCID")); // // read all entities // INodeList entities = document.GetElementsByTagName("entity"); int entityCount = 0; for (int i = 0; i < entities.GetLength(); i++) { INode node = entities.Item(i); // // the entity type and subtype // string id = GetAttributeValue(node, "ID"); string type = GetAttributeValue(node, "TYPE"); string subtype = GetAttributeValue(node, "SUBTYPE"); string cls = GetAttributeValue(node, "CLASS"); // create the entity AceEntity entity = new AceEntity(id, type, subtype, cls); aceDoc.AddEntity(entity); // fetch all mentions of this event IList <INode> mentions = GetChildrenByName(node, "entity_mention"); // parse all its mentions foreach (INode mention1 in mentions) { AceEntityMention mention = ParseEntityMention(mention1); entity.AddMention(mention); aceDoc.AddEntityMention(mention); } entityCount++; } //log.info("Parsed " + entityCount + " XML entities."); // // read all relations // INodeList relations = document.GetElementsByTagName("relation"); for (int i_1 = 0; i_1 < relations.GetLength(); i_1++) { INode node = relations.Item(i_1); // // the relation type, subtype, tense, and modality // string id = GetAttributeValue(node, "ID"); string type = GetAttributeValue(node, "TYPE"); string subtype = GetAttributeValue(node, "SUBTYPE"); string modality = GetAttributeValue(node, "MODALITY"); string tense = GetAttributeValue(node, "TENSE"); // create the relation AceRelation relation = new AceRelation(id, type, subtype, modality, tense); aceDoc.AddRelation(relation); // XXX: fetch relation_arguments here! // fetch all mentions of this relation IList <INode> mentions = GetChildrenByName(node, "relation_mention"); // traverse all mentions foreach (INode mention1 in mentions) { AceRelationMention mention = ParseRelationMention(mention1, aceDoc); relation.AddMention(mention); aceDoc.AddRelationMention(mention); } } // // read all events // INodeList events = document.GetElementsByTagName("event"); for (int i_2 = 0; i_2 < events.GetLength(); i_2++) { INode node = events.Item(i_2); // // the event type, subtype, tense, and modality // string id = GetAttributeValue(node, "ID"); string type = GetAttributeValue(node, "TYPE"); string subtype = GetAttributeValue(node, "SUBTYPE"); string modality = GetAttributeValue(node, "MODALITY"); string polarity = GetAttributeValue(node, "POLARITY"); string genericity = GetAttributeValue(node, "GENERICITY"); string tense = GetAttributeValue(node, "TENSE"); // create the event AceEvent @event = new AceEvent(id, type, subtype, modality, polarity, genericity, tense); aceDoc.AddEvent(@event); // fetch all mentions of this relation IList <INode> mentions = GetChildrenByName(node, "event_mention"); // traverse all mentions foreach (INode mention1 in mentions) { AceEventMention mention = ParseEventMention(mention1, aceDoc); @event.AddMention(mention); aceDoc.AddEventMention(mention); } } return(aceDoc); }
/// <exception cref="System.Exception"/> public static XMLToAnnotation.Data ReadXMLFormat(string fileName) { //Extract character list, gold quote speaker and mention information from the XML document. IDocument doc = XMLUtils.ReadDocumentFromFile(fileName); INode text = doc.GetDocumentElement().GetElementsByTagName("text").Item(0); string docText = GetJustText(text); Annotation document = GetAnnotatedFile(docText, fileName, GetProcessedCoreNLPProperties()); IList <ICoreMap> quotes = document.Get(typeof(CoreAnnotations.QuotationsAnnotation)); IList <CoreLabel> tokens = document.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <XMLToAnnotation.GoldQuoteInfo> goldList = new List <XMLToAnnotation.GoldQuoteInfo>(); IDictionary <int, XMLToAnnotation.Mention> idToMention = new Dictionary <int, XMLToAnnotation.Mention>(); IList <Person> personList = ReadXMLCharacterList(doc); IDictionary <string, IList <Person> > personMap = QuoteAttributionUtils.ReadPersonMap(personList); IList <Pair <int, string> > mentionIdToSpeakerList = new List <Pair <int, string> >(); //there is at least 1 case in which the XML quote does not match up with the automatically-extracted quote. (Ex: quote by Mr. Collins that begins, "Hunsford, near Westerham, Kent, ...") //as the dirty solution, we treat all quotes encapsulated within an XML quote as the same speaker (although this is not 100% accurate!) int quoteIndex = 0; INodeList textElems = text.GetChildNodes(); int tokenIndex = 0; for (int i = 0; i < textElems.GetLength(); i++) { INode chapterNode = textElems.Item(i); if (chapterNode.GetNodeName().Equals("chapter")) { INodeList chapElems = chapterNode.GetChildNodes(); for (int j = 0; j < chapElems.GetLength(); j++) { INode child = chapElems.Item(j); if (child.GetNodeName().Equals("quote")) { //search for nested mentions INodeList quoteChildren = child.GetChildNodes(); for (int k = 0; k < quoteChildren.GetLength(); k++) { INode quoteChild = quoteChildren.Item(k); if (quoteChild.GetNodeName().Equals("mention")) { string mentionText = quoteChild.GetTextContent(); int id = System.Convert.ToInt32(Sharpen.Runtime.Substring(quoteChild.GetAttributes().GetNamedItem("id").GetTextContent(), 1)); IList <int> connections = ReadConnection(quoteChild.GetAttributes().GetNamedItem("connection").GetNodeValue()); int endIndex = GetEndIndex(tokenIndex, tokens, mentionText); // mentions.put(id, new XMLMention(quoteChild.getTextContent(), tokenIndex, endIndex, id, connections)); idToMention[id] = new XMLToAnnotation.Mention(mentionText, tokenIndex, endIndex); tokenIndex = endIndex + 1; } else { string quoteText = quoteChild.GetTextContent(); quoteText = quoteText.ReplaceAll("\n(?!\n)", " "); //trim unnecessarily newlines quoteText = quoteText.ReplaceAll("_", string.Empty); tokenIndex = GetEndIndex(tokenIndex, tokens, quoteText) + 1; } } string quoteText_1 = child.GetTextContent(); // tokenIndex = getEndIndex(tokenIndex, tokens, quoteText) + 1; quoteText_1 = quoteText_1.ReplaceAll("\n(?!\n)", " "); //trim unnecessarily newlines quoteText_1 = quoteText_1.ReplaceAll("_", string.Empty); int quotationOffset = 1; if (quoteText_1.StartsWith("``")) { quotationOffset = 2; } IList <int> connections_1 = ReadConnection(child.GetAttributes().GetNamedItem("connection").GetTextContent()); int id_1 = System.Convert.ToInt32(Sharpen.Runtime.Substring(child.GetAttributes().GetNamedItem("id").GetTextContent(), 1)); int mention_id = null; if (connections_1.Count > 0) { mention_id = connections_1[0]; } else { System.Console.Out.WriteLine("quote w/ no mention. ID: " + id_1); } // Pair<Integer, Integer> mentionPair = idToMentionPair.get(mention_id); mentionIdToSpeakerList.Add(new Pair <int, string>(mention_id, child.GetAttributes().GetNamedItem("speaker").GetTextContent())); string annotatedQuoteText = quotes[quoteIndex].Get(typeof(CoreAnnotations.TextAnnotation)); while (!quoteText_1.EndsWith(annotatedQuoteText)) { quoteIndex++; annotatedQuoteText = quotes[quoteIndex].Get(typeof(CoreAnnotations.TextAnnotation)); mentionIdToSpeakerList.Add(new Pair <int, string>(mention_id, child.GetAttributes().GetNamedItem("speaker").GetTextContent())); } // idToMentionPair.put(id, new Pair<>(-1, -1)); // imention_id = connections.get(0); // quotes.add(new XMLQuote(quoteText.substring(quotationOffset, quoteText.length() - quotationOffset), child.getAttributes().getNamedItem("speaker").getTextContent(), id, chapterIndex, mention_id)); quoteIndex++; } else { if (child.GetNodeName().Equals("mention")) { string mentionText = child.GetTextContent(); int id = System.Convert.ToInt32(Sharpen.Runtime.Substring(child.GetAttributes().GetNamedItem("id").GetTextContent(), 1)); IList <int> connections = ReadConnection(child.GetAttributes().GetNamedItem("connection").GetNodeValue()); int endIndex = GetEndIndex(tokenIndex, tokens, mentionText); idToMention[id] = new XMLToAnnotation.Mention(mentionText, tokenIndex, endIndex); // mentions.put(id, new XMLMention(child.getTextContent(), tokenIndex, endIndex, id, connections)); tokenIndex = endIndex + 1; } else { //#text string nodeText = child.GetTextContent(); nodeText = nodeText.ReplaceAll("\n(?!\n)", " "); nodeText = nodeText.ReplaceAll("_", string.Empty); if (tokenIndex >= tokens.Count) { continue; } tokenIndex = GetEndIndex(tokenIndex, tokens, nodeText) + 1; } } } } } foreach (Pair <int, string> item in mentionIdToSpeakerList) { XMLToAnnotation.Mention mention = idToMention[item.first]; if (mention == null) { goldList.Add(new XMLToAnnotation.GoldQuoteInfo(-1, -1, item.second, null)); } else { goldList.Add(new XMLToAnnotation.GoldQuoteInfo(mention.begin, mention.end, item.second, mention.text)); } } //verify if (document.Get(typeof(CoreAnnotations.QuotationsAnnotation)).Count != goldList.Count) { throw new Exception("Quotes size and gold size don't match!"); } return(new XMLToAnnotation.Data(goldList, personList, document)); }