static void Main(string[] args) { XmlDocument doc = new XmlDocument(); doc.Load("PWREE.da3"); XmlNode dict = doc.ChildNodes[1]; XmlNodeList dcList = dict.SelectNodes("单词块"); FileStream fs = new FileStream("result.txt",FileMode.Create); StreamWriter writer = new StreamWriter(fs); WordsssDB.WordsssDBManager manager = new WordsssDB.WordsssDBManager(); int i = 0; foreach (XmlNode dcNode in dcList) { XmlNode xgNode = dcNode.SelectSingleNode("单词解释块/基本词义/单词项/相关词"); // if(xgNode != null) Regex patter1 = new Regex("l{[a-z.A-Z]+}"); MatchCollection matches = patter1.Matches(xgNode.InnerText); XmlNode wordNode = dcNode.SelectSingleNode("单词"); string base_word = wordNode.InnerText; writer.WriteLine(wordNode.InnerText); if(matches.Count != 0) writer.WriteLine("<1>"); foreach(Match word in matches) { string rep = word.ToString().Replace("l{", ""); rep = rep.Replace("}", ""); writer.WriteLine(" " + rep); if (manager.addAntonym(base_word, rep) == -1) { writer.WriteLine("FAILED"); } } Regex pattern2 = new Regex("L{[a-z.A-Z]+}"); MatchCollection matches2 = pattern2.Matches(xgNode.InnerText); if(matches2.Count != 0) writer.WriteLine("<2>"); foreach (Match word in matches2) { string rep = word.ToString().Replace("L{", ""); rep = rep.Replace("}", ""); writer.WriteLine(" " + rep); if (manager.addSynonym(base_word, rep) == -1) { writer.WriteLine("FAILED"); } } if(i++ % 100 == 0) Console.WriteLine(i); } writer.Close(); }
static void Main(string[] args) { XmlDocument doc = new XmlDocument(); doc.Load("AHD.xml"); FileStream fs = new FileStream("result.txt", FileMode.Create); StreamWriter writer = new StreamWriter(fs); XmlNode dictNode = doc.ChildNodes[1]; int MAX_WORD = 100000; WordsssDB.WordsssDBManager manager = new WordsssDB.WordsssDBManager(); for (int i = 0; i < MAX_WORD && i < dictNode.ChildNodes.Count; i ++ ) { string word_name = ""; XmlNode ckNode = dictNode.ChildNodes[i]; XmlNode dcNode = ckNode.SelectSingleNode("单词"); if (dcNode != null) word_name = dcNode.FirstChild.Value; word_name = word_name.Replace("'", "''"); XmlNode jcNode = ckNode.SelectSingleNode("单词解释块/继承用法"); if (jcNode != null) { XmlNodeList jcList = jcNode.SelectNodes("单词项/单词原型"); if (jcList.Count != 0) { writer.WriteLine(word_name + " " + jcList.Count); foreach (XmlNode jc in jcList) { string str = jc.FirstChild.Value.Replace("&2{”}", ""); str = str.Replace("&2{“}", ""); str = str.Replace(" 或", ""); str = str.Replace("'", "''"); writer.WriteLine(" " +str); if (manager.addDerivation(word_name, str) == -1) { writer.WriteLine("FAILED"); } } } } if(i % 1000 == 0) { Console.WriteLine(i); } } }
static void Main(string[] args) { FileStream fs = new FileStream("1_1_all_fullalpha.txt",FileMode.Open); StreamReader reader = new StreamReader(fs); FileStream outFile = new FileStream("out2.txt",FileMode.Create); StreamWriter writer = new StreamWriter(outFile); WordsssDB.WordsssDBManager manager = new WordsssDB.WordsssDBManager(); int i = 0; bool bAddWord = false; Regex pattern = new Regex("^[a-zA-Z]"); HashSet<string> strHash = new HashSet<string>(); Dictionary<string, Frequency> strDict = new Dictionary<string, Frequency>(); while (!reader.EndOfStream) { string strLine = reader.ReadLine(); string[] splitLine = strLine.Split(new char[]{'\t'},StringSplitOptions.RemoveEmptyEntries); int frequency = int.Parse(splitLine[3]); int frequency2 = int.Parse(splitLine[4]); double frequency3 = double.Parse(splitLine[5]); if (frequency == 0 && splitLine[2] == ":") { bAddWord = false; continue; } string str; if (splitLine[2] == "%" && frequency != 0) { bAddWord = true; continue; } else if (splitLine[2] == ":") { bAddWord = false; } if (splitLine[0] == "@") str = splitLine[2]; else str = splitLine[0]; if (!pattern.IsMatch(str)) continue; if (bAddWord == false && splitLine[2] != ":") continue; if (!strHash.Contains(str)) { strHash.Add(str); strDict.Add(str, new Frequency(frequency, frequency2, frequency3)); } else { strDict[str].frequency1 += frequency; } // Console.WriteLine(manager.addFrequency(current, currentFrequency, currentFrequency2, frequency3)); } int j = 0; Console.WriteLine(strHash.Count); foreach (string str in strHash) { //int j = manager.addFrequency(str, strDict[str].frequency1, strDict[str].frequency2, strDict[str].frequency3); if (j % 100 == 0) Console.WriteLine(j); //if (str.Contains('\'')) //{ string strRep = str.Replace("'", "''"); // if (strDict[str].frequency1 == 0) // { // manager.addFrequency(strRep, strDict[str].frequency1, strDict[str].frequency2, strDict[str].frequency3); if (manager.addFrequency(strRep, strDict[str].frequency1, strDict[str].frequency2, strDict[str].frequency3) == -1) { Console.WriteLine(str); Console.Read(); } writer.WriteLine(str + " " + strDict[str].frequency1 + "," + strDict[str].frequency2 + "," + strDict[str].frequency3); //} j++; //} } writer.WriteLine(strHash.Count()); writer.WriteLine(j); manager.CloseManager(); writer.Close(); }
static void Main(string[] args) { int MAX_WORD_COUNT = 130000; int BEGIN_WORD = 0;//未更新 XmlDocument doc = new XmlDocument(); doc.Load("AHD - Copy.xml"); FileStream fs = new FileStream("sound.txt", FileMode.Create); StreamWriter writer = new StreamWriter(fs); WordsssDB.WordsssDBManager manager = new WordsssDB.WordsssDBManager(); XmlNode dictNode = doc.ChildNodes[1]; Console.WriteLine(dictNode.ChildNodes.Count); for (int i = BEGIN_WORD; i < (BEGIN_WORD + MAX_WORD_COUNT) && i < dictNode.ChildNodes.Count; i++) { string word_name; string word_type; XmlNode ckNode = dictNode.ChildNodes[i]; XmlNode dcNode = ckNode.SelectSingleNode("单词"); if (dcNode == null) continue; word_name = dcNode.FirstChild.Value; word_name = processString(word_name); if (i % 1000 == 0) Console.WriteLine(i); int word_id = -1; if ((word_id = manager.getWordId(word_name)) == -1) continue; //Console.WriteLine(word_name); XmlNodeList jxNodeList = ckNode.SelectNodes("单词解释块"); foreach (XmlNode jxNode in jxNodeList) { XmlNode dxNode = jxNode.SelectSingleNode("基本词义/单词词性"); if (dxNode != null) { word_type = dxNode.FirstChild.Value; } else word_type = ""; XmlNode ybNode = jxNode.SelectSingleNode("基本词义/单词音标/国际音标"); if (ybNode == null) continue; string word_sound = ybNode.FirstChild.Value; Regex pattern = new Regex("{([^}]*)}"); if(pattern.IsMatch(word_sound)) word_sound = pattern.Match(word_sound).Groups[1].Value; word_sound = word_sound.Replace("'","''"); writer.WriteLine(word_name + " " + ybNode.FirstChild.Value + " " + getWordType(word_type)); manager.updateAHDSound(word_id, getWordType(word_type), word_sound); } //writer.WriteLine(doc.ChildNodes[1].FirstChild.SelectSingleNode("//JX").FirstChild.Value); } writer.Close(); }
static void Main(string[] args) { FileStream fs = new FileStream("1_1_all_fullalpha.txt", FileMode.Open); StreamReader reader = new StreamReader(fs); FileStream writerFile = new FileStream("result.txt", FileMode.Create); StreamWriter writer = new StreamWriter(writerFile); bool hasConversion = false; Regex pattern = new Regex("^[a-zA-Z]"); int count = 0; int i = 0; string current_word = ""; HashSet<string> wordHash = new HashSet<string>(); Dictionary<string, HashSet<string>> conversionDic = new Dictionary<string, HashSet<string>>(); while (!reader.EndOfStream) { string strLine = reader.ReadLine(); string[] strSplit = strLine.Split(new char[]{'\t'},StringSplitOptions.RemoveEmptyEntries); int frequency = int.Parse(strSplit[3]); if(i++ % 10000 == 0) Console.WriteLine(i); if(strSplit[2]== "%" && frequency != 0) { if (!pattern.IsMatch(strSplit[0])) continue; hasConversion = true; current_word = strSplit[0]; wordHash.Add(current_word); // count++; continue; } else if (strSplit[2] == ":") { hasConversion = false; if (frequency != 0 && pattern.IsMatch(strSplit[0])) { wordHash.Add(strSplit[0]); count++; } continue; } else if (strSplit[2] == "%" && frequency == 0) { hasConversion = false; continue; } if (hasConversion == true) { count++; wordHash.Add(strSplit[2]); if (conversionDic.Keys.Contains(current_word)&& current_word != strSplit[2]) conversionDic[current_word].Add(strSplit[2]); else if(current_word != strSplit[2]){ conversionDic.Add(current_word, new HashSet<string> { strSplit[2] }); } } } Console.WriteLine(count); Console.WriteLine(wordHash.Count()); Console.WriteLine(conversionDic.Count()); int countDict = 0; WordsssDB.WordsssDBManager manager = new WordsssDB.WordsssDBManager(); foreach (string strConversion in wordHash) { /// writer.WriteLine(strConversion); if (conversionDic.Keys.Contains(strConversion)) { countDict++; string word_base = strConversion.Replace("'", "''"); writer.WriteLine(countDict + " " + word_base); foreach (string word in conversionDic[strConversion]) { string word_conversion = word.Replace("'", "''"); writer.WriteLine(" " + word_conversion); if (manager.addConversion(word_base,word_conversion) == -1) { writer.WriteLine("FAILED"); } } } } writer.WriteLine(countDict); writer.WriteLine(conversionDic.Keys.Count()); writer.WriteLine(wordHash.Count()); writer.Close(); }