/// <exception cref="System.Exception"/> private static void DescendantsHelper(SemanticGraph g, IndexedWord curr, ICollection <IndexedWord> descendantSet, IList <string> allCutOffRels, IList <IndexedWord> doNotAddThese, IList <IndexedWord> seenNodes, bool ignoreCommonTags, IPredicate <CoreLabel > acceptWord, CollectionValuedMap <int, string> feat) { if (seenNodes.Contains(curr)) { return; } seenNodes.Add(curr); if (descendantSet.Contains(curr) || (doNotAddThese != null && doNotAddThese.Contains(curr)) || !acceptWord.Test(curr.BackingLabel())) { return; } if (!ignoreCommonTags || !ignoreTags.Contains(curr.Tag().Trim())) { descendantSet.Add(curr); } foreach (IndexedWord child in g.GetChildren(curr)) { bool dontuse = false; if (doNotAddThese != null && doNotAddThese.Contains(child)) { dontuse = true; } GrammaticalRelation rel = null; if (dontuse == false) { rel = g.Reln(curr, child); dontuse = CheckIfSatisfiesRelConstrains(g, curr, child, allCutOffRels, rel); } if (dontuse == false) { foreach (string cutOffTagRegex in cutoffTags) { if (child.Tag().Matches(cutOffTagRegex)) { if (Debug >= 5) { System.Console.Out.WriteLine("ignored tag " + child + " because it satisfied " + cutOffTagRegex); } dontuse = true; break; } } } if (dontuse == false) { if (!feat.Contains(curr.Index())) { feat[curr.Index()] = new List <string>(); } GetPatternsFromDataMultiClass.GetFeatures(g, curr, false, feat[curr.Index()], rel); //feat.add(curr.index(), "REL-" + rel.getShortName()); DescendantsHelper(g, child, descendantSet, allCutOffRels, doNotAddThese, seenNodes, ignoreCommonTags, acceptWord, feat); } } }
//Here, the index (startIndex, endIndex) seems to be inclusive of the endIndex public virtual void PrintSubGraph(SemanticGraph g, IndexedWord w, IList <string> additionalCutOffRels, IList <string> textTokens, ICollection <string> listOfOutput, ICollection <IntPair> listOfOutputIndices, IList <IndexedWord> seenNodes, IList <IndexedWord > doNotAddThese, bool findSubTrees, ICollection <ExtractedPhrase> extractedPhrases, SemgrexPattern pattern, IPredicate <CoreLabel> acceptWord) { try { if (seenNodes.Contains(w)) { return; } seenNodes.Add(w); if (doNotAddThese.Contains(w)) { return; } IList <IndexedWord> andNodes = new List <IndexedWord>(); DescendantsWithReln(g, w, "conj_and", new List <IndexedWord>(), andNodes); //System.out.println("and nodes are " + andNodes); foreach (IndexedWord w1 in andNodes) { PrintSubGraph(g, w1, additionalCutOffRels, textTokens, listOfOutput, listOfOutputIndices, seenNodes, doNotAddThese, findSubTrees, extractedPhrases, pattern, acceptWord); } Sharpen.Collections.AddAll(doNotAddThese, andNodes); IList <string> allCutOffRels = new List <string>(); if (additionalCutOffRels != null) { Sharpen.Collections.AddAll(allCutOffRels, additionalCutOffRels); } Sharpen.Collections.AddAll(allCutOffRels, cutoffRelations); CollectionValuedMap <int, string> featPerToken = new CollectionValuedMap <int, string>(); ICollection <string> feat = new List <string>(); GetPatternsFromDataMultiClass.GetFeatures(g, w, true, feat, null); ICollection <IndexedWord> words = Descendants(g, w, allCutOffRels, doNotAddThese, ignoreCommonTags, acceptWord, featPerToken); // words.addAll(andNodes); // if (includeSiblings == true) { // for (IndexedWord ws : g.getSiblings(w)) { // if (additionalCutOffNodes == null // || !additionalCutOffNodes.contains(g.reln(g.getParent(w), // ws).getShortName())) // words.addAll(descendants(g, ws, additionalCutOffNodes, doNotAddThese)); // } // } // if(afterand != null){ // Set<IndexedWord> wordsAnd = descendants(g,afterand, // additionalCutOffNodes); // words.removeAll(wordsAnd); // printSubGraph(g,afterand, includeSiblings, additionalCutOffNodes); // } //System.out.println("words are " + words); if (words.Count > 0) { int min = int.MaxValue; int max = -1; foreach (IndexedWord word in words) { if (word.Index() < min) { min = word.Index(); } if (word.Index() > max) { max = word.Index(); } } IntPair indices; // Map<Integer, String> ph = new TreeMap<Integer, String>(); // String phrase = ""; // for (IndexedWord word : words) { // ph.put(word.index(), word.value()); // } // phrase = StringUtils.join(ph.values(), " "); if ((max - min + 1) > maxPhraseLength) { max = min + maxPhraseLength - 1; } indices = new IntPair(min - 1, max - 1); string phrase = StringUtils.Join(textTokens.SubList(min - 1, max), " "); phrase = phrase.Trim(); feat.Add("LENGTH-" + (max - min + 1)); for (int i = min; i <= max; i++) { Sharpen.Collections.AddAll(feat, featPerToken[i]); } //System.out.println("phrase is " + phrase + " index is " + indices + " and maxphraselength is " + maxPhraseLength + " and descendentset is " + words); ExtractedPhrase extractedPh = new ExtractedPhrase(min - 1, max - 1, pattern, phrase, Counters.AsCounter(feat)); if (!listOfOutput.Contains(phrase) && !doNotAddThese.Contains(phrase)) { // if (sentElem != null) { // Element node = new Element(elemString, curNS); // node.addContent(phrase); // sentElem.addContent(node); // } listOfOutput.Add(phrase); if (!listOfOutputIndices.Contains(indices)) { listOfOutputIndices.Add(indices); extractedPhrases.Add(extractedPh); } if (findSubTrees == true) { foreach (IndexedWord word_1 in words) { if (!seenNodes.Contains(word_1)) { PrintSubGraph(g, word_1, additionalCutOffRels, textTokens, listOfOutput, listOfOutputIndices, seenNodes, doNotAddThese, findSubTrees, extractedPhrases, pattern, acceptWord); } } } } } } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); } }