Example #1
0
        public void TestTokenProbArray()
        {
            var preds = chunker.TopKSequences(toks1, tags1);

            Assert.True(preds.Length > 0);
            Assert.AreEqual(expect1.Length, preds[0].Probabilities.Count);
            Assert.True(expect1.SequenceEqual(preds[0].Outcomes));
            Assert.False(expect1.SequenceEqual(preds[1].Outcomes));
        }
Example #2
0
        public void TestTokenProbArray()
        {
            var sTop = sChunker.TopKSequences(ChunkerMETest.toks1, ChunkerMETest.tags1);
            var jTop = jChunker.topKSequences(ChunkerMETest.toks1, ChunkerMETest.tags1);

            Assert.AreEqual(jTop.Length, sTop.Length);

            for (var i = 0; i < jTop.Length; i++)
            {
                var jOut   = jTop[i].getOutcomes();
                var jProbs = jTop[i].getProbs();

                Assert.AreEqual(jOut.size(), sTop[i].Outcomes.Count);

                for (var j = 0; j < jOut.size(); j++)
                {
                    Assert.AreEqual(jOut.get(j), sTop[i].Outcomes[j]);
                    Assert.AreEqual(jProbs[j], sTop[i].Probabilities[j], 0.0000000001d);
                }

                Assert.AreEqual(jTop[i].getScore(), sTop[i].Score, 0.0000000001d);
            }
        }
Example #3
0
        /// <summary>
        /// Returns the top chunk sequences for the specified parse.
        /// </summary>
        /// <param name="p">A pos-tag assigned parse.</param>
        /// <param name="minChunkScore">A minimum score below which chunks should not be advanced.</param>
        /// <returns>The top chunk assignments to the specified parse.</returns>
        protected virtual Parse[] AdvanceChunks(Parse p, double minChunkScore)
        {
            // chunk
            var children = p.Children;
            var words    = new string[children.Length];
            var pTags    = new string[words.Length];

            //var probs = new double[words.Length];

            for (int i = 0, il = children.Length; i < il; i++)
            {
                words[i] = children[i].Head.CoveredText;
                pTags[i] = children[i].Type;
            }

            //System.err.println("adjusted mcs = "+(minChunkScore-p.getProb()));

            var cs = chunker.TopKSequences(words, pTags, minChunkScore - p.Probability);

            var newParses = new Parse[cs.Length];

            for (var si = 0; si < cs.Length; si++)
            {
                newParses[si] = (Parse)p.Clone();  //copies top level

                if (createDerivationString)
                {
                    newParses[si].Derivation.Append(si).Append(".");
                }

                var tags = cs[si].Outcomes.ToArray();

                var    start = -1;
                var    end   = 0;
                string type  = null;

                for (var j = 0; j <= tags.Length; j++)
                {
                    if (j != tags.Length)
                    {
                        newParses[si].AddProbability(Math.Log(cs[si].Probabilities[j]));
                    }
                    if (j != tags.Length && tags[j].StartsWith(CONT))
                    {
                        // if continue just update end chunking tag don't use contTypeMap
                        end = j;
                    }
                    else
                    {
                        //make previous constituent if it exists
                        if (type != null)
                        {
                            var p1 = p.Children[start];
                            var p2 = p.Children[end];

                            var cons = new Parse[end - start + 1];
                            cons[0] = p1;
                            //cons[0].label="Start-"+type;
                            if (end - start != 0)
                            {
                                cons[end - start] = p2;
                                //cons[end-start].label="Cont-"+type;
                                for (var ci = 1; ci < end - start; ci++)
                                {
                                    cons[ci] = p.Children[ci + start];
                                    //cons[ci].label="Cont-"+type;
                                }
                            }
                            var chunk = new Parse(p1.Text, new Span(p1.Span.Start, p2.Span.End), type, 1, headRules.GetHead(cons, type))
                            {
                                IsChunk = true
                            };

                            newParses[si].Insert(chunk);
                        }
                        if (j != tags.Length)
                        {
                            // update for new constituent
                            if (tags[j].StartsWith(START))
                            {
                                // don't use startTypeMap these are chunk tags
                                type  = tags[j].Substring(START.Length);
                                start = j;
                                end   = j;
                            }
                            else
                            {
                                // other
                                type = null;
                            }
                        }
                    }
                }
            }
            return(newParses);
        }