/// <summary> /// 获取包含根节点在内的单词数组 /// </summary> /// <returns></returns> public CoNLLWord[] GetWordArrWithRoot() { var words = new CoNLLWord[word.Length + 1]; words[0] = CoNLLWord.ROOT; Array.Copy(word, 0, words, 1, word.Length); return(words); }
/// <summary> /// /// </summary> /// <param name="word"></param> /// <param name="i"></param> /// <param name="j"></param> /// <returns></returns> public static IEnumerable <string> GenerateUniContext(CoNLLWord[] word, int i, int j) { var context = new List <string>(); context.Add(word[i].NAME + '→' + word[j].NAME); context.Add(word[i].POSTAG + '→' + word[j].POSTAG); context.Add(word[i].NAME + '→' + word[j].NAME + (i - j)); context.Add(word[i].POSTAG + '→' + word[j].POSTAG + (i - j)); CoNLLWord wordBeforeI = i - 1 >= 0 ? word[i - 1] : CoNLLWord.NULL; CoNLLWord wordBeforeJ = j - 1 >= 0 ? word[j - 1] : CoNLLWord.NULL; context.Add(wordBeforeI.NAME + '@' + word[i].NAME + '→' + word[j].NAME); context.Add(word[i].NAME + '→' + wordBeforeJ.NAME + '@' + word[j].NAME); context.Add(wordBeforeI.POSTAG + '@' + word[i].POSTAG + '→' + word[j].POSTAG); context.Add(word[i].POSTAG + '→' + wordBeforeJ.POSTAG + '@' + word[j].POSTAG); return(context); }
/// <summary> /// 构造一个句子 /// </summary> /// <param name="lines">行列表,每行存储一个Word的相关信息</param> public CoNLLSentence(List <CoNllLine> lines) { var size = lines.Count; word = new CoNLLWord[size]; for (int i = 0; i < lines.Count; i++) { word[i] = new CoNLLWord(lines[i]); var head = int.Parse(lines[word[i].ID - 1].value[6]) - 1; if (head != -1) { word[i].HEAD = word[head]; } else { word[i].HEAD = CoNLLWord.ROOT; } } }
/// <summary> /// 获取一组单词中指定目标位置处单词的上下文 /// 从当前单词的前2单词处(如果不存在,则使用NULL代替)开始,上下文为 活动单词名称/词性 + 目标位置mark + 活动单词位置与目标位置的差 /// </summary> /// <param name="word"></param> /// <param name="index"></param> /// <param name="mark"></param> /// <returns></returns> public static IEnumerable <string> GenerateSingleWordContext(CoNLLWord[] word, int index, string mark) { var context = new List <string>(); CoNLLWord w = null; for (int i = index - 2; i < index + 2 + 1; i++) { if (i >= 0 && i < word.Length) { w = word[i]; } else { w = CoNLLWord.NULL; } context.Add(w.NAME + mark + (i - index)); context.Add(w.POSTAG + mark + (i - index)); } return(context); }