예제 #1
0
        public static bool MakeModel(string corpusPath, string modelSavePath)
        {
            var posSet = new SortedSet <string>(StrComparer.Default);
            var dm     = new DictionaryMaker();

            for (int i = 0; i < 2; i++)                 //! 执行两遍,无非是将label的频率从1增加到2
            {
                foreach (var s in CoNLLUtil.LoadSentences(corpusPath))
                {
                    foreach (var w in s.word)
                    {
                        AddPair(w.NAME, w.HEAD.NAME, w.DEPREL, dm);
                        AddPair(w.NAME, WrapTag(w.HEAD.POSTAG), w.DEPREL, dm);
                        AddPair(WrapTag(w.POSTAG), w.HEAD.NAME, w.DEPREL, dm);
                        AddPair(WrapTag(w.POSTAG), WrapTag(w.HEAD.POSTAG), w.DEPREL, dm);
                        posSet.Add(w.POSTAG);
                    }
                }
            }

            var sb = new StringBuilder();

            foreach (var pos in posSet)
            {
                sb.Append("cases \"" + pos + "\":\n");
            }
            File.WriteAllText(Config.Word_Nat_Weight_Model_Path, sb.ToString());
            return(dm.SaveTxtTo(modelSavePath));
        }
예제 #2
0
 public CoNLLWord(int id, string lemma, string cpostag, string postag)
 {
     ID      = id;
     LEMMA   = lemma;
     CPOSTAG = cpostag;
     POSTAG  = postag;
     NAME    = CoNLLUtil.Compile(postag, lemma);
 }
예제 #3
0
        public static void MakeModel(string corpusPath, string modelSavePath)
        {
            var sentences = CoNLLUtil.LoadSentences(corpusPath);
            var lines     = new List <string>(sentences.Count * 30);
            var sb        = new StringBuilder();

            //int id = 1;
            for (int k = 0; k < sentences.Count; k++)
            {
                var s = sentences[k];

                var edges = s.GetEdgeArr();             // 获取一句话中各词之间的依存关系(边)
                var word  = s.GetWordArrWithRoot();     // 获取一句话中的所有单词,包括虚根节点
                var size  = edges.GetLength(0);
                for (int i = 0; i < size; i++)
                {
                    for (int j = 0; j < size; j++)
                    {
                        if (i == j)
                        {
                            continue;                   // 词条与自身之间需要不考虑依存关系
                        }
                        var contexts = new List <string>();
                        // 从i出发到达j的边,可能存在这样的依存关系,也可能不存在,此时使用NULL代替
                        contexts.AddRange(GenerateSingleWordContext(word, i, "i"));
                        contexts.AddRange(GenerateSingleWordContext(word, j, "j"));
                        contexts.AddRange(GenerateUniContext(word, i, j));

                        foreach (var c in contexts)
                        {
                            sb.Append(c).Append(' ');
                        }
                        sb.Append(edges[i, j]);
                        lines.Add(sb.ToString());
                        sb.Clear();
                    }
                }
            }
            File.WriteAllLines(modelSavePath, lines.ToArray());
        }