private WordDictionaryFile LoadFromTextFile(String fileName) { WordDictionaryFile dictFile = new WordDictionaryFile(); dictFile.Dicts = new List <IDataNode>(); using (StreamReader sr = new StreamReader(fileName, Encoding.UTF8)) { while (!sr.EndOfStream) { string line = sr.ReadLine(); string[] strs = line.Split(new char[] { '|' }); if (strs.Length == 3) { string word = strs[0].Trim(); POSType pos = (POSType)int.Parse(strs[1].Substring(2, strs[1].Length - 2), System.Globalization.NumberStyles.HexNumber); double frequency = double.Parse(strs[2]); WordAttribute dict = new WordAttribute(word, pos, frequency); dictFile.Dicts.Add(dict); } } } return(dictFile); }
public PositionLength(int position, int length, WordAttribute wordAttr) { this.Position = position; this.Length = length; this.WordAttr = wordAttr; this.Level = 0; }
public List <IDataNode> Load() { List <IDataNode> nodes = new List <IDataNode>(); using (StreamReader sr = new StreamReader(setting.Uri)) { string line = sr.ReadLine(); while (line != null) { string[] parts = line.Split(new char[] { ' ' }); WordAttribute wa = new WordAttribute(); if (parts.Length == 3) { wa.Word = parts[0]; wa.Frequency = Double.Parse(parts[1]); wa.POS = (BluePrint.SegmentFramework.POSType)Convert.ToInt32(parts[2]); } else { wa.Word = parts[0]; } nodes.Add(wa); line = sr.ReadLine(); } } return(nodes); }
private WordDictionaryFile LoadFromBinFile(String fileName, out string verNumStr) { WordDictionaryFile dictFile = new WordDictionaryFile(); dictFile.Dicts = new List <IDataNode>(); FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read); byte[] version = new byte[32]; fs.Read(version, 0, version.Length); String ver = Encoding.UTF8.GetString(version, 0, version.Length); int zeroPosition = ver.IndexOf('\0'); if (zeroPosition >= 0) { ver = ver.Substring(0, zeroPosition); } var matches = verRegex.Matches(ver); if (matches.Count > 0) { verNumStr = matches[0].Value; } else { verNumStr = null; } while (fs.Position < fs.Length) { byte[] buf = new byte[sizeof(int)]; fs.Read(buf, 0, buf.Length); int length = BitConverter.ToInt32(buf, 0); buf = new byte[length]; fs.Read(buf, 0, buf.Length); string word = Encoding.UTF8.GetString(buf, 0, length - sizeof(int) - sizeof(double)); POSType pos = (POSType)BitConverter.ToInt32(buf, length - sizeof(int) - sizeof(double)); double frequency = BitConverter.ToDouble(buf, length - sizeof(double)); WordAttribute dict = new WordAttribute(word, pos, frequency); string.Intern(dict.Word); dictFile.Dicts.Add(dict); } fs.Close(); return(dictFile); }