public TrieTreeNode AddWord(string word, double frequency, POSType pos) { if (word.Length == 1) { return(AddWord(word[0], frequency, pos)); } else { char[] chars = word.ToCharArray(); TrieTreeNode node = _root; for (int i = 0; i < chars.Length; i++) { TrieTreeNode newnode = node.AddChild(chars[i]); node = newnode; } if (frequency == double.NaN) { node.IncreaseFrequency(); } else { node.Frequency += frequency; } node.WordEnded = true; if (pos != POSType.UNKNOWN && pos != POSType.NEWLINE) { node.AddPOS(pos); } return(node); } }
public static void SetPosType(POSType posType) { var log = Mvx.IoCProvider.Resolve <IMvxLog>(); log.Debug("App: Setting Pos type : {0}", posType.ToString()); PosType = posType; }
private WordDictionaryFile LoadFromTextFile(String fileName) { WordDictionaryFile dictFile = new WordDictionaryFile(); dictFile.Dicts = new List <IDataNode>(); using (StreamReader sr = new StreamReader(fileName, Encoding.UTF8)) { while (!sr.EndOfStream) { string line = sr.ReadLine(); string[] strs = line.Split(new char[] { '|' }); if (strs.Length == 3) { string word = strs[0].Trim(); POSType pos = (POSType)int.Parse(strs[1].Substring(2, strs[1].Length - 2), System.Globalization.NumberStyles.HexNumber); double frequency = double.Parse(strs[2]); WordAttribute dict = new WordAttribute(word, pos, frequency); dictFile.Dicts.Add(dict); } } } return(dictFile); }
public TrieTreeNode AddWord(string word, double frequency, POSType pos) { if (word.Length == 1) { return AddWord(word[0], frequency, pos); } else { char[] chars = word.ToCharArray(); TrieTreeNode node = _root; for (int i = 0; i < chars.Length; i++) { TrieTreeNode newnode = node.AddChild(chars[i]); node = newnode; } if (frequency == double.NaN) node.IncreaseFrequency(); else node.Frequency += frequency; node.WordEnded = true; if (pos != POSType.UNKNOWN && pos != POSType.NEWLINE) node.AddPOS(pos); return node; } }
private WordDictionaryFile LoadFromBinFile(String fileName, out string verNumStr) { WordDictionaryFile dictFile = new WordDictionaryFile(); dictFile.Dicts = new List <IDataNode>(); FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read); byte[] version = new byte[32]; fs.Read(version, 0, version.Length); String ver = Encoding.UTF8.GetString(version, 0, version.Length); int zeroPosition = ver.IndexOf('\0'); if (zeroPosition >= 0) { ver = ver.Substring(0, zeroPosition); } var matches = verRegex.Matches(ver); if (matches.Count > 0) { verNumStr = matches[0].Value; } else { verNumStr = null; } while (fs.Position < fs.Length) { byte[] buf = new byte[sizeof(int)]; fs.Read(buf, 0, buf.Length); int length = BitConverter.ToInt32(buf, 0); buf = new byte[length]; fs.Read(buf, 0, buf.Length); string word = Encoding.UTF8.GetString(buf, 0, length - sizeof(int) - sizeof(double)); POSType pos = (POSType)BitConverter.ToInt32(buf, length - sizeof(int) - sizeof(double)); double frequency = BitConverter.ToDouble(buf, length - sizeof(double)); WordAttribute dict = new WordAttribute(word, pos, frequency); string.Intern(dict.Word); dictFile.Dicts.Add(dict); } fs.Close(); return(dictFile); }
internal TrieTreeNode AddWord(char ch, double frequency, bool isWordEnded, POSType pos) { TrieTreeNode newnode = _root.AddChild(ch); if (frequency == double.NaN) newnode.IncreaseFrequency(); else newnode.Frequency += frequency; newnode.WordEnded = true; if (pos != POSType.UNKNOWN && pos != POSType.NEWLINE) newnode.AddPOS(pos); return newnode; }
/// <summary> /// create a instance of ParseResult /// </summary> /// <param name="text">word text</param> /// <param name="startIndex">start position</param> /// <param name="type">POS value</param> /// <param name="value">.NET value for this word</param> /// <returns></returns> public static ParseResult Create(string text, int startIndex, POSType type, object value) { if (startIndex < 0) { throw new ArgumentOutOfRangeException("startIndex must be bigger than 0."); } ParseResult pr = new ParseResult(); pr.Text = text.Trim(); pr.StartPos = startIndex; pr.Type = type; pr.Value = value; return(pr); }
internal TrieTreeNode AddWord(char ch, double frequency, bool isWordEnded, POSType pos) { TrieTreeNode newnode = _root.AddChild(ch); if (frequency == double.NaN) { newnode.IncreaseFrequency(); } else { newnode.Frequency += frequency; } newnode.WordEnded = true; if (pos != POSType.UNKNOWN && pos != POSType.NEWLINE) { newnode.AddPOS(pos); } return(newnode); }
void AssertServerResponse(string response, bool expectedBoolean, string expectedWord, POSType expectedPOS) { if (response.StartsWith("Server side error")) Assert.Fail("server error"); string[] data = response.Split(new char[] { SOH }); if (data.Length <= 2) { if (expectedBoolean == true) Assert.Fail("expected 0 but was 1"); } else { if (expectedBoolean == false) Assert.Fail("expected 1 but was 0"); if(expectedPOS!= POSType.UNKNOWN) Assert.AreEqual(expectedPOS, (POSType)Int32.Parse(data[3])); Assert.AreEqual(expectedWord, data[1]); } }
/// <summary> /// create a instance of ParseResult /// </summary> /// <param name="text">word text</param> /// <param name="startIndex">start position</param> /// <param name="type">POS value</param> /// <returns></returns> public static ParseResult Create(string text, int startIndex, POSType type) { return(Create(text, startIndex, type, null)); }
public TrieTreeNode AddWord(string word, POSType pos) { return AddWord(word, double.NaN, pos); }
public TrieTreeNode AddWord(char ch, double frequency, POSType pos) { return AddWord(ch, frequency, true, pos); }
public TrieTreeNode AddWord(string word, POSType pos) { return(AddWord(word, double.NaN, pos)); }
public TrieTreeNode AddWord(char ch, double frequency, POSType pos) { return(AddWord(ch, frequency, true, pos)); }
public static void AssertParseResult(ParseResult pr, string text, int startPos, POSType type) { Assert.AreEqual(text, pr.Text); Assert.AreEqual(startPos, pr.StartPos); Assert.AreEqual(type, pr.Type); }
public WordAttribute(string word, POSType pos, double frequency) { this.Word = word; this.POS = pos; this.Frequency = frequency; }
public static string GetPOS(POSType pos) { return POS[(int)pos]; }
void AssertServerResponse(string response, bool expectedBoolean, string expectedWord, POSType expectedPOS) { if (response.StartsWith("Server side error")) { Assert.Fail("server error"); } string[] data = response.Split(new char[] { SOH }); if (data.Length <= 2) { if (expectedBoolean == true) { Assert.Fail("expected 0 but was 1"); } } else { if (expectedBoolean == false) { Assert.Fail("expected 1 but was 0"); } if (expectedPOS != POSType.UNKNOWN) { Assert.AreEqual(expectedPOS, (POSType)Int32.Parse(data[3])); } Assert.AreEqual(expectedWord, data[1]); } }
public TrieTreeNode AddWord(char ch, POSType pos) { return(AddWord(ch, double.NaN, true, pos)); }
public void AddPOS(POSType pos) { _posvalue = _posvalue | (int)pos; }
public void RemovePOS(POSType pos) { _posvalue = _posvalue & ~(int)pos; }
private void btnQuery_Click(object sender, EventArgs e) { Regex regex = new Regex("dict://([\\d\\.]+):(\\d{3,5})", RegexOptions.Compiled); string addr = tbDictAddress.Text.Trim(); var matches = regex.Matches(addr); string serverAddr = matches[0].Groups[1].Value; int port = Int32.Parse(matches[0].Groups[2].Value); EndPoint serverAddress = new IPEndPoint(IPAddress.Parse(serverAddr), port); int posValue = 0; foreach (var selectedObject in checkedListBox1.CheckedItems) { string value = (string)selectedObject; var values = Enum.GetValues(typeof(POSType)); foreach (var v in values) { if (((System.Enum)v).Description() == value) { POSType pos = (POSType)v; posValue |= ((int)pos); break; } } } using (DictionaryServiceClient dsc = new DictionaryServiceClient()) { dsc.Connect(serverAddress); TrieTreeResult result = null; if (radioButton1.Checked) { result = dsc.MaximumMatch(tbWord.Text.Trim(), posValue); } else if (radioButton2.Checked) { result = dsc.ReverseMaximumMatch(tbWord.Text.Trim(), posValue); } else { result = dsc.ExactMatch(tbWord.Text.Trim(), posValue); } if (result != null) { string resultText = result.Word; int originalLength1 = 0, originalLength2 = 0; if (result.Frequency != 0) { originalLength1 = resultText.Length; resultText += ", 频率:" + result.Frequency; } if (result.POS > 0) { originalLength2 = resultText.Length; resultText += ", 类型:" + ConvertPOSTypesToString(ConvertValueToPOS(result.POS)); } richTextBox1.Text = resultText; richTextBox1.ForeColor = System.Drawing.Color.Black; if (originalLength1 > 0) { richTextBox1.Select(originalLength1 + 1, 4); richTextBox1.SelectionColor = System.Drawing.Color.Blue; } if (originalLength2 > 0) { richTextBox1.Select(originalLength2 + 1, 4); richTextBox1.SelectionColor = System.Drawing.Color.Blue; } } else { richTextBox1.ForeColor = System.Drawing.Color.Red; richTextBox1.Text = "未找到合适词"; } } }
public TrieTreeNode AddWord(char ch, POSType pos) { return AddWord(ch, double.NaN, true, pos); }