public override CoNLLSentence Parse(List <Term> terms) { var table = new Table(); table.v = new string[terms.Count][]; for (int i = 0; i < terms.Count; i++) { var term = terms[i]; var line = new string[4]; table.v[i] = line; line[0] = term.word; line[2] = DependencyUtil.compilePOS(term.nature); line[1] = line[2].Substring(0, 1); } _crfModel.Tag(table); var words = new CoNLLWord[table.Size]; for (int i = 0; i < words.Length; i++) { words[i] = new CoNLLWord(i + 1, table.v[i][0], table.v[i][2], table.v[i][1]); } for (int i = 0; i < table.Size; i++) { var line = table.v[i]; var dtag = new DTag(line[3]); if (dtag.pos.EndsWith("ROOT")) { words[i].HEAD = CoNLLWord.ROOT; } else { var index = ConvertOffset2Index(dtag, table, i); if (index == -1) { words[i].HEAD = CoNLLWord.NULL; } else { words[i].HEAD = words[index]; } } } for (int i = 0; i < words.Length; i++) { words[i].DEPREL = BigramDependencyModel.Get(words[i].NAME, words[i].POSTAG, words[i].HEAD.NAME, words[i].HEAD.POSTAG); } return(new CoNLLSentence(words)); }
public override List <Term> SegSentence(char[] sentence) { var list = new List <Term>(); if (sentence.Length == 0) { return(list); } var convertedChars = CharTable.Convert(sentence); var table = new Table(); table.v = AtomSeg2Table(convertedChars); _crfModel.Tag(table); int offset = 0; for (int i = 0; i < table.Size; offset += table.v[i][1].Length, i++) { var line = table.v[i]; switch (line[2][0]) { case 'B': int begin = offset; while (table.v[i][2][0] != 'E') // 寻找结束标签'E' { offset += table.v[i][1].Length; i++; if (i == table.Size) { break; // 达到最后一个字符 } } // 退出while循环 if (i == table.Size) // 肯定是由while loop的break退出的,offset已经包含了最后一格词的长度 { list.Add(new Term(new string(sentence, begin, offset - begin), Nature.none)); } else // 由while loop正常退出,当前词标注为'E',offset尚未包含这个词的长度 { list.Add(new Term(new string(sentence, begin, offset - begin + table.v[i][1].Length), Nature.none)); } break; default: // 理论来说,只可能是标注为'S',所以单独成词 list.Add(new Term(new string(sentence, offset, table.v[i][1].Length), Nature.none)); break; } } if (config.natureTagging) { var vertices = ToVertexList(list, true); Viterbi.Compute(vertices, CoreDictTransfromMatrixDictionary.transformMatrixDictionary); for (int i = 0; i < list.Count; i++) { var term = list[i]; if (term.nature == Nature.none) { term.nature = vertices[i + 1].GuessNature(); // vertices[i+1] -> 附加了辅助起始节点 } } } if (config.useCustomDict) { var vertices = ToVertexList(list, false); //? 会不会覆盖上面的词性标注值 CombineByCustomDict(vertices); list = ToTermList(vertices, config.offset); } return(list); }