public Parse AdJoinRoot(Parse node, AbstractHeadRules rules, int parseIndex) { var lastChild = parts[parseIndex]; var adjNode = new Parse(Text, new Span(lastChild.Span.Start, node.Span.End), lastChild.Type, 1, rules.GetHead(new[] { lastChild, node }, lastChild.Type)); adjNode.parts.Add(lastChild); if (node.PreviousPunctuationSet != null) { adjNode.parts.AddRange(node.PreviousPunctuationSet); } adjNode.parts.Add(node); parts[parseIndex] = adjNode; return(adjNode); }
/// <summary> /// Returns the top chunk sequences for the specified parse. /// </summary> /// <param name="p">A pos-tag assigned parse.</param> /// <param name="minChunkScore">A minimum score below which chunks should not be advanced.</param> /// <returns>The top chunk assignments to the specified parse.</returns> protected virtual Parse[] AdvanceChunks(Parse p, double minChunkScore) { // chunk var children = p.Children; var words = new string[children.Length]; var pTags = new string[words.Length]; //var probs = new double[words.Length]; for (int i = 0, il = children.Length; i < il; i++) { words[i] = children[i].Head.CoveredText; pTags[i] = children[i].Type; } //System.err.println("adjusted mcs = "+(minChunkScore-p.getProb())); var cs = chunker.TopKSequences(words, pTags, minChunkScore - p.Probability); var newParses = new Parse[cs.Length]; for (var si = 0; si < cs.Length; si++) { newParses[si] = (Parse)p.Clone(); //copies top level if (createDerivationString) { newParses[si].Derivation.Append(si).Append("."); } var tags = cs[si].Outcomes.ToArray(); var start = -1; var end = 0; string type = null; for (var j = 0; j <= tags.Length; j++) { if (j != tags.Length) { newParses[si].AddProbability(Math.Log(cs[si].Probabilities[j])); } if (j != tags.Length && tags[j].StartsWith(CONT)) { // if continue just update end chunking tag don't use contTypeMap end = j; } else { //make previous constituent if it exists if (type != null) { var p1 = p.Children[start]; var p2 = p.Children[end]; var cons = new Parse[end - start + 1]; cons[0] = p1; //cons[0].label="Start-"+type; if (end - start != 0) { cons[end - start] = p2; //cons[end-start].label="Cont-"+type; for (var ci = 1; ci < end - start; ci++) { cons[ci] = p.Children[ci + start]; //cons[ci].label="Cont-"+type; } } var chunk = new Parse(p1.Text, new Span(p1.Span.Start, p2.Span.End), type, 1, headRules.GetHead(cons, type)) { IsChunk = true }; newParses[si].Insert(chunk); } if (j != tags.Length) { // update for new constituent if (tags[j].StartsWith(START)) { // don't use startTypeMap these are chunk tags type = tags[j].Substring(START.Length); start = j; end = j; } else { // other type = null; } } } } } return(newParses); }