Ejemplo n.º 1
0
        private TagFreqItem <NS>[] LoadDat(string path)
        {
            if (!File.Exists(path))
            {
                return(null);
            }
            var bytes = File.ReadAllBytes(path);

            int index = 0;
            int size  = BitConverter.ToInt32(bytes, index);

            index += 4;

            var valueArr = new TagFreqItem <NS> [size];

            for (int i = 0; i < size; i++)
            {
                var currSize = BitConverter.ToInt32(bytes, index);
                index += 4;

                var tfi = new TagFreqItem <NS>();
                for (int j = 0; j < currSize; j++)
                {
                    var tag = BitConverter.ToInt32(bytes, index);
                    index += 4;

                    var freq = BitConverter.ToInt32(bytes, index);
                    index += 4;

                    tfi.labelMap.Add((NS)tag, freq);
                }
                valueArr[i] = tfi;
            }
            return(valueArr);
        }
Ejemplo n.º 2
0
        public override TagFreqItem <NR>[] OnLoadValue(string path)
        {
            var valueArr = LoadDat(path + ".value.dat");

            if (valueArr != null)
            {
                return(valueArr);
            }

            var valueList = new List <TagFreqItem <NR> >();

            try
            {
                foreach (var line in File.ReadLines(path))
                {
                    var tuple = TagFreqItem <NR> .Create(line);

                    var tfi = new TagFreqItem <NR>();

                    foreach (var p in tuple.Item2)
                    {
                        //tfi.AddLabel((NR)Enum.Parse(typeof(NR), p.Key), p.Value);
                        tfi.labelMap[(NR)Enum.Parse(typeof(NR), p.Key)] = p.Value;
                    }
                    valueList.Add(tfi);
                }
            }
            catch (Exception e)
            {
                return(null);
            }

            return(valueList.ToArray());
        }
Ejemplo n.º 3
0
        private static List <TagFreqItem <NS> > RoleTag(List <Vertex> vertices, WordNet wordnet_all)
        {
            var tagList = new List <TagFreqItem <NS> >();

            for (int i = 0; i < vertices.Count; i++)
            {
                var vertex = vertices[i];

                var nature = vertex.GetNature();
                if (Nature.ns == nature && vertex.attr.totalFreq <= 1000)
                {
                    if (vertex.realWord.Length < 3)     // 二字地名
                    {
                        tagList.Add(new TagFreqItem <NS>(NS.H, NS.G));
                    }
                    else
                    {
                        tagList.Add(new TagFreqItem <NS>(NS.G));
                    }
                    continue;
                }
                var tfi = PlaceDictionary.dict.Get(vertex.word);        // 使用等效词
                if (tfi == null)
                {
                    tfi = new TagFreqItem <NS>(NS.Z, PlaceDictionary.trans_tr_dict.GetFreq(NS.Z));
                }
                tagList.Add(tfi);
            }
            return(tagList);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// 找出给定顶点列表中的顶点的关联词性标签,以及对应在机构词典中的《标签,频率》pair。
        /// </summary>
        /// <param name="vertices"></param>
        /// <param name="wordNetAll"></param>
        /// <returns></returns>
        public static List <TagFreqItem <NT> > RoleTag(List <Vertex> vertices, WordNet wordNetAll)
        {
            var tagList = new List <TagFreqItem <NT> >();

            for (int i = 0; i < vertices.Count; i++)        // 遍历顶点
            {
                var vertex = vertices[i];                   // 当前顶点

                // 找出当前词条的所有关联词性,并作为
                var nature = vertex.GetNature();            // 当前顶点(词条)的词性
                switch (nature)
                {
                case Nature.nrf:                            // 音译人名
                    if (vertex.attr.totalFreq <= 1000)
                    {
                        tagList.Add(new TagFreqItem <NT>(NT.F, 1000));
                        continue;
                    }
                    break;

                case Nature.ni:                             // 机构相关名称
                case Nature.nic:
                case Nature.nis:
                case Nature.nit:
                    var tfi = new TagFreqItem <NT>(NT.K, 1000);             //
                    tfi.AddLabel(NT.D, 1000);
                    tagList.Add(tfi);
                    continue;

                case Nature.m:
                    tagList.Add(new TagFreqItem <NT>(NT.M, 1000));
                    continue;
                }

                var tagItem = OrgDictionary.dictionary.Get(vertex.word);        // 此处使用等效词,更加精准
                if (tagItem == null)
                {
                    tagItem = new TagFreqItem <NT>(NT.Z, OrgDictionary.transformMatrixDictionary.GetFreq(NT.Z));
                }

                tagList.Add(tagItem);
            }
            return(tagList);
        }
Ejemplo n.º 5
0
        private TagFreqItem <NR>[] LoadDat(string path)
        {
            //var bytes = File.ReadAllBytes(path);
            var ba = ByteArray.Create(path);

            if (ba == null)
            {
                return(null);
            }

            //if (bytes == null || bytes.Length < 5) return null;
            //int index = 0;
            //int size = ByteUtil.Bytes2Int(bytes, index);
            //index += 4;
            int size = ba.NextInt();

            var valueArr = new TagFreqItem <NR> [size];

            for (int i = 0; i < size; i++)
            {
                //var currSize = ByteUtil.Bytes2Int(bytes, index);
                //index += 4;
                var currSize = ba.NextInt();

                var tfi = new TagFreqItem <NR>();
                for (int j = 0; j < currSize; j++)
                {
                    //var enumVal = ByteUtil.Bytes2Int(bytes, index);
                    //index += 4;

                    //var freq = ByteUtil.Bytes2Int(bytes, index);
                    //index += 4;

                    var enumVal = ba.NextInt();
                    var freq    = ba.NextInt();

                    tfi.AddLabel((NR)enumVal, freq);
                }
                valueArr[i] = tfi;
            }
            return(valueArr);
        }
        /// <summary>
        /// 角色观察
        /// </summary>
        /// <param name="wordSegs"></param>
        /// <returns></returns>
        public static List <TagFreqItem <NR> > RoleObserve(List <Vertex> wordSegs)
        {
            var tagList = new List <TagFreqItem <NR> >()
            {
                new TagFreqItem <NR>(NR.A, NR.K)
            };                                                                                  // 始 ## 始 A K
            var dict = ChsPersonNameDict.dictionary;

            for (int i = 1; i < wordSegs.Count; i++)         // 跳过起始辅助节点
            {
                var vertex = wordSegs[i];
                var nritem = dict.Get(vertex.realWord);  // 获取词条(节点)的字符串值对应的《标签,频率》pair
                if (nritem == null)                      // 如果没有字符串对应的TagFreqItem,那就由顶点对应的词性来帮助分析
                {
                    switch (vertex.GuessNature())
                    {
                    case Nature.nr:             // 如果词性是人名,
                        if (vertex.attr.totalFreq <= 1000 && vertex.realWord.Length == 2)
                        {
                            nritem = new TagFreqItem <NR>(NR.X, NR.G);
                        }
                        else
                        {
                            nritem = new TagFreqItem <NR>(NR.A, ChsPersonNameDict.transformMatrixDictionary.GetFreq(NR.A));
                        }
                        break;

                    case Nature.nnt:            // 职务职称
                        nritem = new TagFreqItem <NR>(NR.G, NR.K);
                        break;

                    default:
                        nritem = new TagFreqItem <NR>(NR.A, ChsPersonNameDict.transformMatrixDictionary.GetFreq(NR.A));
                        break;
                    }
                }       // 如果人名词典中存在当前顶点这样的词条,那直接添加对应的TagFreqItem到列表中
                tagList.Add(nritem);
            }

            return(tagList);
        }
Ejemplo n.º 7
0
        public override TagFreqItem <NT>[] OnLoadValue(string path)
        {
            var valueArr = LoadDat(path + ".value.dat");

            if (valueArr != null)
            {
                return(valueArr);
            }

            var valueList = new List <TagFreqItem <NT> >();

            try
            {
                foreach (var line in File.ReadLines(path))
                {
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }
                    var tuple = TagFreqItem <NT> .Create(line);

                    var tfi = new TagFreqItem <NT>();

                    foreach (var p in tuple.Item2)
                    {
                        tfi.labelMap.Add((NT)Enum.Parse(typeof(NT), p.Key), p.Value);
                    }
                    valueList.Add(tfi);
                }
            }
            catch (Exception e)
            {
                // log load error
            }
            return(valueList.ToArray());
        }