예제 #1
0
        static void Main(string[] args)
        {
            XmlDocument doc = new XmlDocument();
            doc.Load("PWREE.da3");

            XmlNode dict = doc.ChildNodes[1];

            XmlNodeList dcList = dict.SelectNodes("单词块");

            FileStream fs = new FileStream("result.txt",FileMode.Create);
            StreamWriter writer = new StreamWriter(fs);

            WordsssDB.WordsssDBManager manager = new WordsssDB.WordsssDBManager();

            int i = 0;
            foreach (XmlNode dcNode in dcList)
            {
                XmlNode xgNode = dcNode.SelectSingleNode("单词解释块/基本词义/单词项/相关词");
              //          if(xgNode != null)
                Regex patter1 = new Regex("l{[a-z.A-Z]+}");
                MatchCollection matches = patter1.Matches(xgNode.InnerText);

                XmlNode wordNode = dcNode.SelectSingleNode("单词");

                string base_word = wordNode.InnerText;
                writer.WriteLine(wordNode.InnerText);
                if(matches.Count != 0)
                    writer.WriteLine("<1>");
                foreach(Match word in matches)
                {
                    string rep = word.ToString().Replace("l{", "");
                    rep = rep.Replace("}", "");
                    writer.WriteLine("  " + rep);
                    if (manager.addAntonym(base_word, rep) == -1)
                    {
                        writer.WriteLine("FAILED");
                    }
                }

                Regex pattern2 = new Regex("L{[a-z.A-Z]+}");
                MatchCollection matches2 = pattern2.Matches(xgNode.InnerText);
                if(matches2.Count != 0)
                    writer.WriteLine("<2>");
                foreach (Match word in matches2)
                {
                    string rep = word.ToString().Replace("L{", "");
                    rep = rep.Replace("}", "");
                    writer.WriteLine("  " + rep);
                    if (manager.addSynonym(base_word, rep) == -1)
                    {
                        writer.WriteLine("FAILED");
                    }
                }
                if(i++ % 100 == 0)
                Console.WriteLine(i);
            }
            writer.Close();
        }
예제 #2
0
        static void Main(string[] args)
        {
            XmlDocument doc = new XmlDocument();
            doc.Load("AHD.xml");
            FileStream fs = new FileStream("result.txt", FileMode.Create);
            StreamWriter writer = new StreamWriter(fs);

            XmlNode dictNode = doc.ChildNodes[1];

            int MAX_WORD = 100000;

            WordsssDB.WordsssDBManager manager = new WordsssDB.WordsssDBManager();

            for (int i = 0; i < MAX_WORD && i < dictNode.ChildNodes.Count; i ++ )
            {
                string word_name = "";
                XmlNode ckNode = dictNode.ChildNodes[i];
                XmlNode dcNode = ckNode.SelectSingleNode("单词");
                if (dcNode != null)
                    word_name = dcNode.FirstChild.Value;
                word_name = word_name.Replace("'", "''");
                XmlNode jcNode = ckNode.SelectSingleNode("单词解释块/继承用法");

                if (jcNode != null)
                {
                    XmlNodeList jcList = jcNode.SelectNodes("单词项/单词原型");
                    if (jcList.Count != 0)
                    {
                        writer.WriteLine(word_name + "  " + jcList.Count);
                        foreach (XmlNode jc in jcList)
                        {
                            string str = jc.FirstChild.Value.Replace("&2{”}", "");
                            str = str.Replace("&2{“}", "");
                            str = str.Replace(" 或", "");
                            str = str.Replace("'", "''");
                            writer.WriteLine("  " +str);
                            if (manager.addDerivation(word_name, str) == -1)
                            {
                                writer.WriteLine("FAILED");
                            }

                        }

                    }
                }
                if(i % 1000 == 0)
                {
                    Console.WriteLine(i);
                }
            }
        }
예제 #3
0
        static void Main(string[] args)
        {
            FileStream fs = new FileStream("1_1_all_fullalpha.txt",FileMode.Open);
            StreamReader reader = new StreamReader(fs);
            FileStream outFile = new FileStream("out2.txt",FileMode.Create);
            StreamWriter writer = new StreamWriter(outFile);

            WordsssDB.WordsssDBManager manager = new WordsssDB.WordsssDBManager();

            int i = 0;
            bool bAddWord = false;
            Regex pattern = new Regex("^[a-zA-Z]");
            HashSet<string> strHash = new HashSet<string>();
            Dictionary<string, Frequency> strDict = new Dictionary<string, Frequency>();
            while (!reader.EndOfStream)
            {
                string strLine = reader.ReadLine();
                string[] splitLine = strLine.Split(new char[]{'\t'},StringSplitOptions.RemoveEmptyEntries);

                int frequency = int.Parse(splitLine[3]);
                int frequency2 = int.Parse(splitLine[4]);
                double frequency3 = double.Parse(splitLine[5]);
                if (frequency == 0 && splitLine[2] == ":")
                {
                    bAddWord = false;
                    continue;
                }

                string str;
                if (splitLine[2] == "%" && frequency != 0)
                {
                    bAddWord = true;
                    continue;
                }
                else if (splitLine[2] == ":")
                {
                    bAddWord = false;
                }

                if (splitLine[0] == "@")
                    str = splitLine[2];
                else
                    str = splitLine[0];

                if (!pattern.IsMatch(str))
                    continue;

                if (bAddWord == false && splitLine[2] != ":")
                    continue;
                if (!strHash.Contains(str))
                {
                    strHash.Add(str);
                    strDict.Add(str, new Frequency(frequency, frequency2, frequency3));
                }
                else
                {
                    strDict[str].frequency1 += frequency;
                }
                //      Console.WriteLine(manager.addFrequency(current, currentFrequency, currentFrequency2, frequency3));
            }
            int j = 0;
            Console.WriteLine(strHash.Count);
            foreach (string str in strHash)
            {
                //int j = manager.addFrequency(str, strDict[str].frequency1, strDict[str].frequency2, strDict[str].frequency3);
                if (j  % 100 == 0)
                    Console.WriteLine(j);
                //if (str.Contains('\''))
                //{
                    string strRep = str.Replace("'", "''");
                   // if (strDict[str].frequency1 == 0)
                   // {    //  manager.addFrequency(strRep, strDict[str].frequency1, strDict[str].frequency2, strDict[str].frequency3);

                        if (manager.addFrequency(strRep, strDict[str].frequency1, strDict[str].frequency2, strDict[str].frequency3) == -1)
                        {
                            Console.WriteLine(str);
                            Console.Read();
                        }
                        writer.WriteLine(str + " " + strDict[str].frequency1 + "," + strDict[str].frequency2 + "," + strDict[str].frequency3);
                        //}
                        j++;
                    //}
            }
            writer.WriteLine(strHash.Count());
            writer.WriteLine(j);
            manager.CloseManager();
            writer.Close();
        }
예제 #4
0
        static void Main(string[] args)
        {
            int MAX_WORD_COUNT = 130000;
            int BEGIN_WORD = 0;//未更新
            XmlDocument doc = new XmlDocument();
            doc.Load("AHD - Copy.xml");
            FileStream fs = new FileStream("sound.txt", FileMode.Create);
            StreamWriter writer = new StreamWriter(fs);

            WordsssDB.WordsssDBManager manager = new WordsssDB.WordsssDBManager();

            XmlNode dictNode = doc.ChildNodes[1];
            Console.WriteLine(dictNode.ChildNodes.Count);

            for (int i = BEGIN_WORD; i < (BEGIN_WORD + MAX_WORD_COUNT) && i < dictNode.ChildNodes.Count; i++)
            {
                string word_name;
                string word_type;
                XmlNode ckNode = dictNode.ChildNodes[i];
                XmlNode dcNode = ckNode.SelectSingleNode("单词");
                if (dcNode == null)
                    continue;
                word_name = dcNode.FirstChild.Value;
                word_name = processString(word_name);

                if (i % 1000 == 0)
                    Console.WriteLine(i);
                int word_id = -1;
                if ((word_id = manager.getWordId(word_name)) == -1)
                    continue;
                //Console.WriteLine(word_name);

                XmlNodeList jxNodeList = ckNode.SelectNodes("单词解释块");
                foreach (XmlNode jxNode in jxNodeList)
                {
                    XmlNode dxNode = jxNode.SelectSingleNode("基本词义/单词词性");

                    if (dxNode != null)
                    {
                        word_type = dxNode.FirstChild.Value;
                    }
                    else
                        word_type = "";

                    XmlNode ybNode = jxNode.SelectSingleNode("基本词义/单词音标/国际音标");
                    if (ybNode == null)
                        continue;

                    string word_sound = ybNode.FirstChild.Value;
                    Regex pattern = new Regex("{([^}]*)}");
                    if(pattern.IsMatch(word_sound))
                        word_sound = pattern.Match(word_sound).Groups[1].Value;
                    word_sound = word_sound.Replace("'","''");
                    writer.WriteLine(word_name + " " + ybNode.FirstChild.Value + " " + getWordType(word_type));

                    manager.updateAHDSound(word_id, getWordType(word_type), word_sound);

                }
                //writer.WriteLine(doc.ChildNodes[1].FirstChild.SelectSingleNode("//JX").FirstChild.Value);
            }
            writer.Close();
        }
예제 #5
0
        static void Main(string[] args)
        {
            FileStream fs = new FileStream("1_1_all_fullalpha.txt", FileMode.Open);
            StreamReader reader = new StreamReader(fs);

            FileStream writerFile = new FileStream("result.txt", FileMode.Create);
            StreamWriter writer = new StreamWriter(writerFile);

            bool hasConversion = false;
            Regex pattern = new Regex("^[a-zA-Z]");
            int count = 0;
            int i = 0;
            string current_word = "";
            HashSet<string> wordHash = new HashSet<string>();
            Dictionary<string, HashSet<string>> conversionDic = new Dictionary<string, HashSet<string>>();
            while (!reader.EndOfStream)
            {
                string strLine = reader.ReadLine();

                string[] strSplit = strLine.Split(new char[]{'\t'},StringSplitOptions.RemoveEmptyEntries);
                int frequency = int.Parse(strSplit[3]);

                if(i++ % 10000 == 0)
                Console.WriteLine(i);
                if(strSplit[2]== "%" && frequency != 0)
                {
                    if (!pattern.IsMatch(strSplit[0]))
                        continue;
                    hasConversion = true;
                    current_word = strSplit[0];
                    wordHash.Add(current_word);
                  //  count++;
                    continue;
                }
                else if (strSplit[2] == ":")
                {
                    hasConversion = false;
                    if (frequency != 0 && pattern.IsMatch(strSplit[0]))
                    {
                        wordHash.Add(strSplit[0]);
                        count++;
                    }
                    continue;
                }
                else if (strSplit[2] == "%" && frequency == 0)
                {
                    hasConversion = false;
                    continue;
                }

                if (hasConversion == true)
                {
                    count++;
                    wordHash.Add(strSplit[2]);
                    if (conversionDic.Keys.Contains(current_word)&& current_word != strSplit[2])
                        conversionDic[current_word].Add(strSplit[2]);
                    else if(current_word  != strSplit[2]){
                        conversionDic.Add(current_word, new HashSet<string> { strSplit[2] });
                    }
                }
            }
            Console.WriteLine(count);
            Console.WriteLine(wordHash.Count());
            Console.WriteLine(conversionDic.Count());
            int countDict = 0;

            WordsssDB.WordsssDBManager manager = new WordsssDB.WordsssDBManager();
            foreach (string strConversion in wordHash)
            {

               /// writer.WriteLine(strConversion);
                if (conversionDic.Keys.Contains(strConversion))
                {
                    countDict++;
                    string word_base = strConversion.Replace("'", "''");
                    writer.WriteLine(countDict + " " + word_base);
                    foreach (string word in conversionDic[strConversion])
                    {
                        string word_conversion = word.Replace("'", "''");
                        writer.WriteLine("  " + word_conversion);
                        if (manager.addConversion(word_base,word_conversion) == -1)
                        {
                            writer.WriteLine("FAILED");
                        }
                    }
                }
            }
            writer.WriteLine(countDict);
            writer.WriteLine(conversionDic.Keys.Count());
            writer.WriteLine(wordHash.Count());

            writer.Close();
        }