private TagFreqItem <NS>[] LoadDat(string path) { if (!File.Exists(path)) { return(null); } var bytes = File.ReadAllBytes(path); int index = 0; int size = BitConverter.ToInt32(bytes, index); index += 4; var valueArr = new TagFreqItem <NS> [size]; for (int i = 0; i < size; i++) { var currSize = BitConverter.ToInt32(bytes, index); index += 4; var tfi = new TagFreqItem <NS>(); for (int j = 0; j < currSize; j++) { var tag = BitConverter.ToInt32(bytes, index); index += 4; var freq = BitConverter.ToInt32(bytes, index); index += 4; tfi.labelMap.Add((NS)tag, freq); } valueArr[i] = tfi; } return(valueArr); }
public override TagFreqItem <NR>[] OnLoadValue(string path) { var valueArr = LoadDat(path + ".value.dat"); if (valueArr != null) { return(valueArr); } var valueList = new List <TagFreqItem <NR> >(); try { foreach (var line in File.ReadLines(path)) { var tuple = TagFreqItem <NR> .Create(line); var tfi = new TagFreqItem <NR>(); foreach (var p in tuple.Item2) { //tfi.AddLabel((NR)Enum.Parse(typeof(NR), p.Key), p.Value); tfi.labelMap[(NR)Enum.Parse(typeof(NR), p.Key)] = p.Value; } valueList.Add(tfi); } } catch (Exception e) { return(null); } return(valueList.ToArray()); }
private static List <TagFreqItem <NS> > RoleTag(List <Vertex> vertices, WordNet wordnet_all) { var tagList = new List <TagFreqItem <NS> >(); for (int i = 0; i < vertices.Count; i++) { var vertex = vertices[i]; var nature = vertex.GetNature(); if (Nature.ns == nature && vertex.attr.totalFreq <= 1000) { if (vertex.realWord.Length < 3) // 二字地名 { tagList.Add(new TagFreqItem <NS>(NS.H, NS.G)); } else { tagList.Add(new TagFreqItem <NS>(NS.G)); } continue; } var tfi = PlaceDictionary.dict.Get(vertex.word); // 使用等效词 if (tfi == null) { tfi = new TagFreqItem <NS>(NS.Z, PlaceDictionary.trans_tr_dict.GetFreq(NS.Z)); } tagList.Add(tfi); } return(tagList); }
/// <summary> /// 找出给定顶点列表中的顶点的关联词性标签,以及对应在机构词典中的《标签,频率》pair。 /// </summary> /// <param name="vertices"></param> /// <param name="wordNetAll"></param> /// <returns></returns> public static List <TagFreqItem <NT> > RoleTag(List <Vertex> vertices, WordNet wordNetAll) { var tagList = new List <TagFreqItem <NT> >(); for (int i = 0; i < vertices.Count; i++) // 遍历顶点 { var vertex = vertices[i]; // 当前顶点 // 找出当前词条的所有关联词性,并作为 var nature = vertex.GetNature(); // 当前顶点(词条)的词性 switch (nature) { case Nature.nrf: // 音译人名 if (vertex.attr.totalFreq <= 1000) { tagList.Add(new TagFreqItem <NT>(NT.F, 1000)); continue; } break; case Nature.ni: // 机构相关名称 case Nature.nic: case Nature.nis: case Nature.nit: var tfi = new TagFreqItem <NT>(NT.K, 1000); // tfi.AddLabel(NT.D, 1000); tagList.Add(tfi); continue; case Nature.m: tagList.Add(new TagFreqItem <NT>(NT.M, 1000)); continue; } var tagItem = OrgDictionary.dictionary.Get(vertex.word); // 此处使用等效词,更加精准 if (tagItem == null) { tagItem = new TagFreqItem <NT>(NT.Z, OrgDictionary.transformMatrixDictionary.GetFreq(NT.Z)); } tagList.Add(tagItem); } return(tagList); }
private TagFreqItem <NR>[] LoadDat(string path) { //var bytes = File.ReadAllBytes(path); var ba = ByteArray.Create(path); if (ba == null) { return(null); } //if (bytes == null || bytes.Length < 5) return null; //int index = 0; //int size = ByteUtil.Bytes2Int(bytes, index); //index += 4; int size = ba.NextInt(); var valueArr = new TagFreqItem <NR> [size]; for (int i = 0; i < size; i++) { //var currSize = ByteUtil.Bytes2Int(bytes, index); //index += 4; var currSize = ba.NextInt(); var tfi = new TagFreqItem <NR>(); for (int j = 0; j < currSize; j++) { //var enumVal = ByteUtil.Bytes2Int(bytes, index); //index += 4; //var freq = ByteUtil.Bytes2Int(bytes, index); //index += 4; var enumVal = ba.NextInt(); var freq = ba.NextInt(); tfi.AddLabel((NR)enumVal, freq); } valueArr[i] = tfi; } return(valueArr); }
/// <summary> /// 角色观察 /// </summary> /// <param name="wordSegs"></param> /// <returns></returns> public static List <TagFreqItem <NR> > RoleObserve(List <Vertex> wordSegs) { var tagList = new List <TagFreqItem <NR> >() { new TagFreqItem <NR>(NR.A, NR.K) }; // 始 ## 始 A K var dict = ChsPersonNameDict.dictionary; for (int i = 1; i < wordSegs.Count; i++) // 跳过起始辅助节点 { var vertex = wordSegs[i]; var nritem = dict.Get(vertex.realWord); // 获取词条(节点)的字符串值对应的《标签,频率》pair if (nritem == null) // 如果没有字符串对应的TagFreqItem,那就由顶点对应的词性来帮助分析 { switch (vertex.GuessNature()) { case Nature.nr: // 如果词性是人名, if (vertex.attr.totalFreq <= 1000 && vertex.realWord.Length == 2) { nritem = new TagFreqItem <NR>(NR.X, NR.G); } else { nritem = new TagFreqItem <NR>(NR.A, ChsPersonNameDict.transformMatrixDictionary.GetFreq(NR.A)); } break; case Nature.nnt: // 职务职称 nritem = new TagFreqItem <NR>(NR.G, NR.K); break; default: nritem = new TagFreqItem <NR>(NR.A, ChsPersonNameDict.transformMatrixDictionary.GetFreq(NR.A)); break; } } // 如果人名词典中存在当前顶点这样的词条,那直接添加对应的TagFreqItem到列表中 tagList.Add(nritem); } return(tagList); }
public override TagFreqItem <NT>[] OnLoadValue(string path) { var valueArr = LoadDat(path + ".value.dat"); if (valueArr != null) { return(valueArr); } var valueList = new List <TagFreqItem <NT> >(); try { foreach (var line in File.ReadLines(path)) { if (string.IsNullOrWhiteSpace(line)) { continue; } var tuple = TagFreqItem <NT> .Create(line); var tfi = new TagFreqItem <NT>(); foreach (var p in tuple.Item2) { tfi.labelMap.Add((NT)Enum.Parse(typeof(NT), p.Key), p.Value); } valueList.Add(tfi); } } catch (Exception e) { // log load error } return(valueList.ToArray()); }