private static IList <IHasWord> FixQuotes(IList <IHasWord> input) { int inputSize = input.Count; LinkedList <IHasWord> result = new LinkedList <IHasWord>(); if (inputSize == 0) { return(result); } bool begin; // see if there is a quote at the end if (input[inputSize - 1].Word().Equals("\"")) { // alternate from the end begin = false; for (int i = inputSize - 1; i >= 0; i--) { IHasWord hw = input[i]; string tok = hw.Word(); if (tok.Equals("\"")) { if (begin) { hw.SetWord("``"); begin = false; } else { hw.SetWord("\'\'"); begin = true; } } // otherwise leave it alone result.AddFirst(hw); } } else { // end loop // alternate from the beginning begin = true; foreach (IHasWord hw in input) { string tok = hw.Word(); if (tok.Equals("\"")) { if (begin) { hw.SetWord("``"); begin = false; } else { hw.SetWord("\'\'"); begin = true; } } // otherwise leave it alone result.AddLast(hw); } } // end loop return(result); }
/// <exception cref="Java.Util.NoSuchElementException"/> private Tree GetTreeFromInputStream() { int wordIndex = 1; // FSA while (tokenizer.MoveNext()) { string token = tokenizer.Current; switch (token) { case leftParen: { // cdm 20100225: This next line used to have "" instead of null, but the traditional and current tree normalizers depend on the label being null not "" when there is no label on a tree (like the outermost English PTB level) string label = (tokenizer.Peek().Equals(leftParen)) ? null : tokenizer.Current; if (rightParen.Equals(label)) { //Skip past empty trees continue; } else { if (treeNormalizer != null) { label = treeNormalizer.NormalizeNonterminal(label); } } if (label != null) { label = StarPattern.Matcher(label).ReplaceAll("*"); label = SlashPattern.Matcher(label).ReplaceAll("/"); } Tree newTree = treeFactory.NewTreeNode(label, null); // dtrs are added below if (currentTree == null) { stack.Add(newTree); } else { currentTree.AddChild(newTree); stack.Add(currentTree); } currentTree = newTree; break; } case rightParen: { if (stack.IsEmpty()) { // Warn that file has too many right parentheses log.Info("PennTreeReader: warning: file has extra non-matching right parenthesis [ignored]"); goto label_break; } //Accept currentTree = stack.Remove(stack.Count - 1); // i.e., stack.pop() if (stack.IsEmpty()) { return(currentTree); } break; } default: { if (currentTree == null) { // A careful Reader should warn here, but it's kind of useful to // suppress this because then the TreeReader doesn't print a ton of // messages if there is a README file in a directory of Trees. // log.info("PennTreeReader: warning: file has extra token not in a s-expression tree: " + token + " [ignored]"); goto label_break; } string terminal = (treeNormalizer == null) ? token : treeNormalizer.NormalizeTerminal(token); terminal = StarPattern.Matcher(terminal).ReplaceAll("*"); terminal = SlashPattern.Matcher(terminal).ReplaceAll("/"); Tree leaf = treeFactory.NewLeaf(terminal); if (leaf.Label() is IHasIndex) { IHasIndex hi = (IHasIndex)leaf.Label(); hi.SetIndex(wordIndex); } if (leaf.Label() is IHasWord) { IHasWord hw = (IHasWord)leaf.Label(); hw.SetWord(leaf.Label().Value()); } if (leaf.Label() is IHasTag) { IHasTag ht = (IHasTag)leaf.Label(); ht.SetTag(currentTree.Label().Value()); } wordIndex++; currentTree.AddChild(leaf); // cdm: Note: this implementation just isn't as efficient as the old recursive descent parser (see 2008 code), where all the daughters are gathered before the tree is made.... break; } } label_continue :; } label_break :; //Reject if (currentTree != null) { log.Info("PennTreeReader: warning: incomplete tree (extra left parentheses in input): " + currentTree); } return(null); }