Пример #1
0
        public Parse AdJoinRoot(Parse node, AbstractHeadRules rules, int parseIndex)
        {
            var lastChild = parts[parseIndex];
            var adjNode   = new Parse(Text, new Span(lastChild.Span.Start, node.Span.End), lastChild.Type, 1, rules.GetHead(new[] { lastChild, node }, lastChild.Type));

            adjNode.parts.Add(lastChild);
            if (node.PreviousPunctuationSet != null)
            {
                adjNode.parts.AddRange(node.PreviousPunctuationSet);
            }
            adjNode.parts.Add(node);
            parts[parseIndex] = adjNode;
            return(adjNode);
        }
Пример #2
0
        /// <summary>
        /// Returns the top chunk sequences for the specified parse.
        /// </summary>
        /// <param name="p">A pos-tag assigned parse.</param>
        /// <param name="minChunkScore">A minimum score below which chunks should not be advanced.</param>
        /// <returns>The top chunk assignments to the specified parse.</returns>
        protected virtual Parse[] AdvanceChunks(Parse p, double minChunkScore)
        {
            // chunk
            var children = p.Children;
            var words    = new string[children.Length];
            var pTags    = new string[words.Length];

            //var probs = new double[words.Length];

            for (int i = 0, il = children.Length; i < il; i++)
            {
                words[i] = children[i].Head.CoveredText;
                pTags[i] = children[i].Type;
            }

            //System.err.println("adjusted mcs = "+(minChunkScore-p.getProb()));

            var cs = chunker.TopKSequences(words, pTags, minChunkScore - p.Probability);

            var newParses = new Parse[cs.Length];

            for (var si = 0; si < cs.Length; si++)
            {
                newParses[si] = (Parse)p.Clone();  //copies top level

                if (createDerivationString)
                {
                    newParses[si].Derivation.Append(si).Append(".");
                }

                var tags = cs[si].Outcomes.ToArray();

                var    start = -1;
                var    end   = 0;
                string type  = null;

                for (var j = 0; j <= tags.Length; j++)
                {
                    if (j != tags.Length)
                    {
                        newParses[si].AddProbability(Math.Log(cs[si].Probabilities[j]));
                    }
                    if (j != tags.Length && tags[j].StartsWith(CONT))
                    {
                        // if continue just update end chunking tag don't use contTypeMap
                        end = j;
                    }
                    else
                    {
                        //make previous constituent if it exists
                        if (type != null)
                        {
                            var p1 = p.Children[start];
                            var p2 = p.Children[end];

                            var cons = new Parse[end - start + 1];
                            cons[0] = p1;
                            //cons[0].label="Start-"+type;
                            if (end - start != 0)
                            {
                                cons[end - start] = p2;
                                //cons[end-start].label="Cont-"+type;
                                for (var ci = 1; ci < end - start; ci++)
                                {
                                    cons[ci] = p.Children[ci + start];
                                    //cons[ci].label="Cont-"+type;
                                }
                            }
                            var chunk = new Parse(p1.Text, new Span(p1.Span.Start, p2.Span.End), type, 1, headRules.GetHead(cons, type))
                            {
                                IsChunk = true
                            };

                            newParses[si].Insert(chunk);
                        }
                        if (j != tags.Length)
                        {
                            // update for new constituent
                            if (tags[j].StartsWith(START))
                            {
                                // don't use startTypeMap these are chunk tags
                                type  = tags[j].Substring(START.Length);
                                start = j;
                                end   = j;
                            }
                            else
                            {
                                // other
                                type = null;
                            }
                        }
                    }
                }
            }
            return(newParses);
        }