public void TestTokenProbArray() { var preds = chunker.TopKSequences(toks1, tags1); Assert.True(preds.Length > 0); Assert.AreEqual(expect1.Length, preds[0].Probabilities.Count); Assert.True(expect1.SequenceEqual(preds[0].Outcomes)); Assert.False(expect1.SequenceEqual(preds[1].Outcomes)); }
public void TestTokenProbArray() { var sTop = sChunker.TopKSequences(ChunkerMETest.toks1, ChunkerMETest.tags1); var jTop = jChunker.topKSequences(ChunkerMETest.toks1, ChunkerMETest.tags1); Assert.AreEqual(jTop.Length, sTop.Length); for (var i = 0; i < jTop.Length; i++) { var jOut = jTop[i].getOutcomes(); var jProbs = jTop[i].getProbs(); Assert.AreEqual(jOut.size(), sTop[i].Outcomes.Count); for (var j = 0; j < jOut.size(); j++) { Assert.AreEqual(jOut.get(j), sTop[i].Outcomes[j]); Assert.AreEqual(jProbs[j], sTop[i].Probabilities[j], 0.0000000001d); } Assert.AreEqual(jTop[i].getScore(), sTop[i].Score, 0.0000000001d); } }
/// <summary> /// Returns the top chunk sequences for the specified parse. /// </summary> /// <param name="p">A pos-tag assigned parse.</param> /// <param name="minChunkScore">A minimum score below which chunks should not be advanced.</param> /// <returns>The top chunk assignments to the specified parse.</returns> protected virtual Parse[] AdvanceChunks(Parse p, double minChunkScore) { // chunk var children = p.Children; var words = new string[children.Length]; var pTags = new string[words.Length]; //var probs = new double[words.Length]; for (int i = 0, il = children.Length; i < il; i++) { words[i] = children[i].Head.CoveredText; pTags[i] = children[i].Type; } //System.err.println("adjusted mcs = "+(minChunkScore-p.getProb())); var cs = chunker.TopKSequences(words, pTags, minChunkScore - p.Probability); var newParses = new Parse[cs.Length]; for (var si = 0; si < cs.Length; si++) { newParses[si] = (Parse)p.Clone(); //copies top level if (createDerivationString) { newParses[si].Derivation.Append(si).Append("."); } var tags = cs[si].Outcomes.ToArray(); var start = -1; var end = 0; string type = null; for (var j = 0; j <= tags.Length; j++) { if (j != tags.Length) { newParses[si].AddProbability(Math.Log(cs[si].Probabilities[j])); } if (j != tags.Length && tags[j].StartsWith(CONT)) { // if continue just update end chunking tag don't use contTypeMap end = j; } else { //make previous constituent if it exists if (type != null) { var p1 = p.Children[start]; var p2 = p.Children[end]; var cons = new Parse[end - start + 1]; cons[0] = p1; //cons[0].label="Start-"+type; if (end - start != 0) { cons[end - start] = p2; //cons[end-start].label="Cont-"+type; for (var ci = 1; ci < end - start; ci++) { cons[ci] = p.Children[ci + start]; //cons[ci].label="Cont-"+type; } } var chunk = new Parse(p1.Text, new Span(p1.Span.Start, p2.Span.End), type, 1, headRules.GetHead(cons, type)) { IsChunk = true }; newParses[si].Insert(chunk); } if (j != tags.Length) { // update for new constituent if (tags[j].StartsWith(START)) { // don't use startTypeMap these are chunk tags type = tags[j].Substring(START.Length); start = j; end = j; } else { // other type = null; } } } } } return(newParses); }