/// <summary>Adds a sentence final punctuation mark to sentences that lack one.</summary> /// <remarks> /// Adds a sentence final punctuation mark to sentences that lack one. /// This method adds a period (the first sentence final punctuation word /// in a parser language pack) to sentences that don't have one within /// the last 3 words (to allow for close parentheses, etc.). It checks /// tags for punctuation, if available, otherwise words. /// </remarks> /// <param name="sentence">The sentence to check</param> /// <param name="length">The length of the sentence (just to avoid recomputation)</param> private bool AddSentenceFinalPunctIfNeeded(IList <IHasWord> sentence, int length) { int start = length - 3; if (start < 0) { start = 0; } ITreebankLanguagePack tlp = op.tlpParams.TreebankLanguagePack(); for (int i = length - 1; i >= start; i--) { IHasWord item = sentence[i]; // An object (e.g., CoreLabel) can implement HasTag but not actually store // a tag so we need to check that there is something there for this case. // If there is, use only it, since word tokens can be ambiguous. string tag = null; if (item is IHasTag) { tag = ((IHasTag)item).Tag(); } if (tag != null && !tag.IsEmpty()) { if (tlp.IsSentenceFinalPunctuationTag(tag)) { return(false); } } else { string str = item.Word(); if (tlp.IsPunctuationWord(str)) { return(false); } } } // none found so add one. if (op.testOptions.verbose) { log.Info("Adding missing final punctuation to sentence."); } string[] sfpWords = tlp.SentenceFinalPunctuationWords(); if (sfpWords.Length > 0) { sentence.Add(new Word(sfpWords[0])); } return(true); }
public virtual Tree TransformTree(Tree tree) { if (tree == null) { return(null); } ITreeFactory tf = tree.TreeFactory(); string s = tree.Value(); if (tlp.IsStartSymbol(s)) { return(TransformTree(tree.FirstChild())); } if (tree.IsLeaf()) { return(tf.NewLeaf(tree.Label())); } s = tlp.BasicCategory(s); if (((whOption & 1) != 0) && s.StartsWith("WH")) { s = Sharpen.Runtime.Substring(s, 2); } if ((whOption & 2) != 0) { s = s.ReplaceAll("^WP", "PRP"); // does both WP and WP$ !! s = s.ReplaceAll("^WDT", "DT"); s = s.ReplaceAll("^WRB", "RB"); } if (((whOption & 4) != 0) && s.StartsWith("WH")) { s = Sharpen.Runtime.Substring(s, 2); } // wsg2010: Might need a better way to deal with tag ambiguity. This still doesn't handle the // case where the GOLD tree does not label a punctuation mark as such (common in French), and // the guess tree does. if (deletePunct && tree.IsPreTerminal() && (tlp.IsEvalBIgnoredPunctuationTag(s) || tlp.IsPunctuationWord(tree.FirstChild().Value()))) { return(null); } // remove the extra NPs inserted in the collinsBaseNP option if (fixCollinsBaseNP && s.Equals("NP")) { Tree[] kids = tree.Children(); if (kids.Length == 1 && tlp.BasicCategory(kids[0].Value()).Equals("NP")) { return(TransformTree(kids[0])); } } // Magerman erased this distinction, and everyone else has followed like sheep... if (s.Equals("PRT")) { s = "ADVP"; } IList <Tree> children = new List <Tree>(); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.Children()[cNum]; Tree newChild = TransformTree(child); if (newChild != null) { children.Add(newChild); } } if (children.IsEmpty()) { return(null); } Tree node = tf.NewTreeNode(tree.Label(), children); node.SetValue(s); return(node); }