/// <summary> /// 从关键词主词典加载数据。 /// </summary> /// <param name="filename">关键词主词典的Xml文件</param> private static WordPosCollection LoadSegWords(string filename) { WordPosCollection wpList = new WordPosCollection(); try///用XmlTextReader的方法快速读取字典文件 { XmlTextReader reader = new XmlTextReader(filename); while (reader.Read()) { if (reader.IsStartElement()) { if (!reader.IsEmptyElement && reader.Name == "kw") { reader.MoveToAttribute("w");///将节点移动“权重”属性上,“权重”属性名为:"w" int i = int.Parse(reader.Value); string kwStr = reader.ReadElementString(); WordPos wp = new WordPos(kwStr, i); wpList.WordPosList.Add(wp); } } } reader.Close(); } catch (Exception) { throw; } return(wpList); }
/// <summary> /// 从文本文件读取字典 /// </summary> /// <param name="fileName"></param> public WordPosCollection LoadFromTextDict(String fileName) { WordPosCollection dictFile = new WordPosCollection(); String dictStr = CFile.ReadFileToString(fileName, "utf-8"); String[] words = CRegex.Split(dictStr, "\r\n"); foreach (String word in words) { String[] wp = CRegex.Split(word, @"\|"); if (wp == null) { continue; } if (wp.Length != 2) { continue; } int pos = 0; try { pos = int.Parse(wp[1]); } catch { continue; } WordPos dict = new WordPos(wp[0], pos); if (dict.Word.Contains("一") || dict.Word.Contains("二") || dict.Word.Contains("三") || dict.Word.Contains("四") || dict.Word.Contains("五") || dict.Word.Contains("六") || dict.Word.Contains("七") || dict.Word.Contains("八") || dict.Word.Contains("九") || dict.Word.Contains("十")) { dict.Pos |= (int)PosEnum.POS_A_M; } if (dict.Word == "字典") { dict.Pos = (int)PosEnum.POS_D_N; } dictFile.WordPosList.Add(dict); } return(dictFile); }
public override bool Equals(object obj) { WordPos wp = (WordPos)obj; if (this.Word != wp.Word) { return(false); } if (this.Pos != wp.Pos) { return(false); } return(true); }
public WordPosCollection LoadFromString() { WordPosCollection wpList = new WordPosCollection(); string[] words = new string[3]; //= Initialize.SegWords; foreach (string word in words) { try { string[] wordSplit = word.Split(new string[] { "\t\t" }, StringSplitOptions.RemoveEmptyEntries); WordPos dictStruct = new WordPos(wordSplit[0], int.Parse(wordSplit[1])); wpList.WordPosList.Add(dictStruct); } catch { } } return(wpList); }
/// <summary> /// 增加单个词 /// </summary> /// <param name="newWord"></param> /// <param name="pos"></param> public void AddDict(string newWord, int pos) { if (string.IsNullOrEmpty(newWord)) { return; } newWord = newWord.Trim(); if (_POS.GetPos(newWord, out isReg).Length > 0) { return; } WordPos word = new WordPos(newWord, pos); dictFile.WordPosList.Add(word); _POS.AddWordPos(word.Word, word.Pos); }
public WordPosCollection LoadFromTextFile(string dictpath) { WordPosCollection file = new WordPosCollection(); StreamReader sr = new StreamReader(dictpath, Encoding.UTF8); while (!sr.EndOfStream) { try { string[] word = sr.ReadLine().Split(new string[] { "\t\t" }, StringSplitOptions.RemoveEmptyEntries); WordPos dict = new WordPos(word[0], int.Parse(word[1])); file.WordPosList.Add(dict); } catch { } } sr.Close(); return(file); }
/// <summary> /// 增加单个词 /// </summary> /// <param name="newWord"></param> /// <param name="pos"></param> public void AddDict(string newWord) { if (string.IsNullOrEmpty(newWord)) { return; } newWord = newWord.Trim(); if (_POS.GetPos(newWord, out isReg).Length > 0) { return; } string[] new1 = newWord.Split(new char[] { '\t' }); string newword = new1[0]; int newpos = 0; if (new1.Length > 1) { newpos = int.Parse(new1[1]); } WordPos word = new WordPos(newword, newpos); dictFile.WordPosList.Add(word); _POS.AddWordPos(word.Word, word.Pos); }