예제 #1
0
        public static string BreakToSentencesWordnet(string explanation, WordEntryCollection EntireWordCollection)
        {
            string pattern = @"(v\.)|(n\.)|(adj\.)|(adv\.)";
            string output  = explanation;
            int    position;
            int    lengthIncreament = 0;

            foreach (Match mat in Regex.Matches(explanation, pattern))
            {
                output = output.Insert(mat.Index + mat.Value.Length + lengthIncreament, Environment.NewLine);
                if (mat.Index > 0)
                {
                    output            = output.Insert(mat.Index + lengthIncreament, Environment.NewLine);
                    lengthIncreament += 2;
                }
                lengthIncreament += 2;
            }

            pattern          = @"[a-zA-Z]+";
            lengthIncreament = 0;
            foreach (Match mat in Regex.Matches(output, pattern))
            {
                // Check if the mat.value is a valid word
                if ((position = EntireWordCollection.FindSimilarity(mat.Value)) > 0)
                {
                    //MessageBox.Show(mat.Value + "   " + position);
                    output            = output.Insert(mat.Index + position + lengthIncreament, Environment.NewLine);
                    lengthIncreament += 2;
                }
            }
            return(output);
        }
예제 #2
0
        private void btnRun_Click(object sender, EventArgs e)
        {
            if (chlbOption.GetItemChecked(Convert_GRE_SAT_to_XML))
            {
                #region
                MyWordList   WL    = new MyWordList();
                StreamReader ifile = new StreamReader(fileNamein);
                string       line;
                while ((line = ifile.ReadLine()) != null)
                {
                    // Load data from file to WL
                    MyWordInfo w = new MyWordInfo();
                    #region
                    char[]   Separators = { ' ', '\n' };
                    string[] strspt     = line.Split(Separators, StringSplitOptions.RemoveEmptyEntries);
                    Term     T;
                    string   s;
                    if (VRegex.IsUnsign(strspt[0]))
                    {
                        w.rank       = Convert.ToInt32(strspt[0]);
                        w.infinitive = strspt[1];
                        s            = line.Replace(strspt[0] + " " + strspt[1] + " = ", "");
                    }
                    else
                    {
                        w.rank       = 0;
                        w.infinitive = strspt[0];
                        s            = line.Replace(strspt[0] + " = ", "");
                    }
                    s      = VRegex.BreakToSentencesGRESAT(s);
                    T      = new Term();
                    T.type = "";
                    T.meaning.UpdateMeaning(s);
                    w.TermList = new List <Term>();
                    w.TermList.Add(T);
                    #endregion
                    WL.Add(w);
                }
                XElement EX = WL.ToXElement();
                EX.Save(fileNameout);
                ifile.Close();
                #endregion
            }

            else if (chlbOption.GetItemChecked(Create_Infinitive_Derivative_XML))
            {
                #region
                SimpleWordList WL = new SimpleWordList();
                WL.LoadFromWordFormNInf(fileNamein);
                WL.Sort(new CompareSimpleWordByInf());
                WL.MergeDerivative();
                //WL.Sort(new CompareSimpleWordByInf());
                int i = 0;
                while (i < WL.Count - 1)
                {
                    if (String.Equals(WL[i].inf, WL[i + 1].inf))
                    {
                        WL.RemoveAt(i + 1);
                    }
                    else
                    {
                        ++i;
                    }
                }
                XElement XE = WL.ToXElementInfinitiveDerivative();
                XE.Save(fileNameout);
                #endregion
            }


            else if (chlbOption.GetItemChecked(Convert_WordNet_to_XML))
            {
                #region
                WordList WL = new WordList();
                // fileNamein = wordnet.txt
                StreamReader        ifile                = new StreamReader(fileNamein);
                WordEntryCollection EntireCollection     = new WordEntryCollection();
                WordEntryCollection InfinitiveCollection = new WordEntryCollection();
                string line;
                // Load data to EntireCollection
                // Load Data to Infinitive List
                #region
                EntireCollection.LoadFromFile(EntireWordCollectionFile);
                EntireCollection.Sort();
                EntireCollection.SaveToFile(EntireWordCollectionFile);

                InfinitiveCollection.LoadFromFile(InfinitiveWordfile);
                #endregion
                // Set the beginning position to read ifile to 0
                //ifile.BaseStream.Position = 0;
                //ifile.DiscardBufferedData();

                while ((line = ifile.ReadLine()) != null)
                {
                    // Load data from file to WL
                    WordInfo w = new WordInfo();
                    Term     T = new Term();
                    string   s, explanation;
                    s = VRegex.getWord(line);
                    if (VRegex.IsValidSingleWordEntry(s))// && InfinitiveCollection.Contains(s))
                    {
                        w.infinitive = s;
                        explanation  = VRegex.GetExpanation(line, s);
                        explanation  = VRegex.BreakToSentencesWordnet(explanation, EntireCollection);
                        T.UpdateMeaing(explanation);
                        T.type = "";
                        w.TermList.Add(T);
                        WL.Add(w);
                    }
                }
                XElement EX = WL.ToXElement();
                EX.Save(fileNameout);
                ifile.Close();
                #endregion
            }
            else if (chlbOption.GetItemChecked(Create_Infinitive_List))
            {
                #region
                StreamReader ifile = new StreamReader(fileNamein);
                StreamWriter ofile = new StreamWriter(fileNameout);
                string       line;
                while (null != (line = ifile.ReadLine()))
                {
                    string s = VRegex.getWord(line);
                    if (VRegex.IsValidSingleWordEntry(s))
                    {
                        ofile.WriteLine(VRegex.GetExpanation(line, s));
                    }
                }
                ifile.Close();
                ofile.Close();
                #endregion
            }
            else if (chlbOption.GetItemChecked(Create_Single_Word_Collection))
            {
                #region
                StreamReader ifile = new StreamReader(fileNamein);
                StreamWriter ofile = new StreamWriter(fileNameout);
                string       line;
                while (null != (line = ifile.ReadLine()))
                {
                    string s = VRegex.getWord(line);
                    if (VRegex.IsValidSingleWordEntry(s))
                    {
                        ofile.WriteLine(s);
                    }
                }
                ifile.Close();
                ofile.Close();
                #endregion
            }
            else if (chlbOption.GetItemChecked(Experiment))
            {
                #region

                WordEntryCollection EntireCollection = new WordEntryCollection();
                EntireCollection.LoadFromFile(EntireWordCollectionFile);
                //EntireCollection.Sort();
                //EntireCollection.SaveToFile(EntireWordCollectionFile);
                //string sentence = "v.come or bring to a finish or an end; finishHe finished the dishesShe completed the requirements for her Master's DegreeThe fastest runner finished the race in just over 2 hours; others finished in over 4 hoursbring to a whole, with all the necessary parts or elementsA child would complete the familycomplete or carry out; dispatch, dischargedischarge one's dutiescomplete a pass; nailwrite all the required information onto a form; fill out, fill in, make outfill out this questionnaire, please!make out a formadj.having every necessary or normal part or component or stepa complete meala complete wardrobea complete set of the Britannicaa complete set of chinaa complete defeata complete accountingperfect and complete in every respect; having all necessary qualities; consummatea complete gentlemanconsummate happinessa consummate performancehighly skilled; accomplishedan accomplished pianista complete musicianwithout qualification; used informally as (often pejorative) intensifiers; arrant(a), complete(a), consummate(a), double-dyed(a), everlasting(a), gross(a), perfect(a), pure(a), sodding(a), stark(a), staring(a), thoroughgoing(a), utter(a), unadulteratedan arrant foola complete cowarda consummate foola double-dyed villaingross negligencea perfect idiotpure follywhat a sodding messstark staring mada thoroughgoing villainutter nonsensethe unadulterated truthhaving come or been brought to a conclusion; concluded, ended, over(p), all over, terminatedthe harvesting was completethe affair is over, ended, finishedthe abruptly terminated interview  adj.accepting willingly; acceptiveacceptive of every new ideaan acceptant type of mind";
                string sentence = "Britannicaa";
                sentence = VRegex.BreakToSentencesWordnet(sentence, EntireCollection);
                MessageBox.Show(sentence);
                #endregion
                //Load  data for WordList
                #region

                /*
                 * WordList WL = new WordList();
                 * WL.LoadXML(fileNamein);
                 * WordList NewWL = new WordList();
                 * WL.Sort(new CompareWordInfoByInfinitive());
                 * //Load Data for SimpleWordList
                 * //XElement xml_ifile = XElement.Load(fileNamein);
                 * //xml_ifile.Save(@"D:\Coding\C Sharp\Data\temp.xml");
                 * //WL.ToXElement().Save(@"D:\Coding\C Sharp\Data\temp.xml");
                 *
                 * SimpleWordList SL = new SimpleWordList();
                 * SL.LoadFromWordFormNInf(@"D:\Coding\C Sharp\Data\Relation_Derivative_To_Infinitive.txt");
                 * SL.Sort(new CompareSimpleWordByInf());
                 * SL.MergeDerivative();
                 *
                 * int i = 0;
                 * while (i < SL.Count - 1)
                 * {
                 *  if (String.Equals(SL[i].inf, SL[i + 1].inf))
                 *      SL.RemoveAt(i + 1);
                 *  else
                 ++i;
                 * }
                 #endregion
                 *
                 #region
                 * int j = 0;
                 * for (i = 0; i < SL.Count; ++i)
                 * {
                 *  while ((j < WL.Count) && (0 < String.Compare(SL[i].inf, WL[j].infinitive, StringComparison.OrdinalIgnoreCase)))
                 *  {
                 *      //WL.RemoveAt(j);
                 ++j;
                 *  }
                 *  if (j < WL.Count && String.Equals(SL[i].inf, WL[j].infinitive, StringComparison.OrdinalIgnoreCase))
                 *  {
                 *      foreach (string s in SL[i].derivateives)
                 *          WL[j].derivatives.Add(s);
                 *      NewWL.Add(WL[j]);
                 *  }
                 *  //++j;
                 * }
                 *
                 * XElement XE = NewWL.ToXElement();
                 * XE.Save(fileNameout);
                 */
                ////SL.ToXElementInfinitiveDerivative().Save(@"D:\Coding\C Sharp\Data\temp.txt");
                #endregion
            }
            else if (chlbOption.GetItemChecked(Create_MacMillan_XML))
            {
                #region
                StreamReader ifile     = new StreamReader(fileNamein, Encoding.UTF8);
                WordList     MacMillan = new WordList();
                string       line;
                while (null != (line = ifile.ReadLine()))
                {
                    WordInfo w = new WordInfo();
                    Term     T = new Term();
                    string   s, explanation;
                    s = VRegex.getWord(line);
                    if (VRegex.IsValidSingleWordEntry(s))// && InfinitiveCollection.Contains(s))
                    {
                        w.infinitive = s;
                        explanation  = VRegex.GetExpanation(line, s);
                        explanation  = VRegex.BreakToSentencesMacMillan(explanation); //, EntireCollection);
                        T.UpdateMeaing(explanation);
                        T.type = "";
                        w.TermList.Add(T);
                        MacMillan.Add(w);
                    }
                }
                XElement XE = MacMillan.ToXElement();
                XE.Save(fileNameout);
                ifile.Close();
                #endregion
            }
            else if (chlbOption.GetItemChecked(Create_ConciseEnglish_XML))
            {
                #region
                WordList WL = new WordList();
                // fileNamein = concise.txt
                StreamReader        ifile            = new StreamReader(fileNamein);
                WordEntryCollection EntireCollection = new WordEntryCollection();
                string line;
                // Load data to EntireCollection
                // Load Data to Infinitive List

                EntireCollection.LoadFromFile(EntireWordCollectionFile);
                //EntireCollection.Sort();
                //EntireCollection.SaveToFile(EntireWordCollectionFile);

                //InfinitiveCollection.LoadFromFile(InfinitiveWordfile);



                while ((line = ifile.ReadLine()) != null)
                {
                    // Load data from file to WL
                    WordInfo w = new WordInfo();
                    Term     T = new Term();
                    string   s, explanation;
                    s = VRegex.getWord(line);
                    if (VRegex.IsValidSingleWordEntry(s))// && InfinitiveCollection.Contains(s))
                    {
                        w.infinitive = s;
                        explanation  = VRegex.GetExpanation(line, s);
                        explanation  = VRegex.BreakToSentencesConcise(explanation, EntireCollection);
                        T.UpdateMeaing(explanation);
                        T.type = "";
                        w.TermList.Add(T);
                        WL.Add(w);
                    }
                }
                XElement EX = WL.ToXElement();
                EX.Save(fileNameout);
                ifile.Close();
                #endregion
            }
            MessageBox.Show("Mission completed.");
        }
예제 #3
0
        public static string BreakToSentencesConcise(string explanation, WordEntryCollection EntireCollection)
        {
            string BreakLine = @"(v\.)|(n\.)|(adj\.)|(adv\.)";
            Match  mat       = Regex.Match(explanation, BreakLine);

            if (mat.Success)
            {
                explanation = explanation.Remove(0, mat.Index);
            }

            //mat = Regex.Match(explanation, BreakLine);
            //int Inc = 0;
            //foreach (Match m in Regex.Matches(explanation, BreakLine))
            //{
            //    explanation = explanation.Insert(mat.Index + mat.Length + Inc, Environment.NewLine);
            //    if (mat.Index != 0)
            //    {
            //        explanation = explanation.Insert(mat.Index + Inc, Environment.NewLine);
            //        Inc += 2;
            //    }
            //    Inc += 2;
            //}

            string complex = @"((v\.)|(n\.)|(adj\.)|(adv\.))([^.]+)((v\.)|(n\.)|(adj\.)|(adv\.))";
            string output  = "";
            string single;
            int    Inc;
            int    position;

            single = explanation;
            while (explanation != null || explanation == "")
            {
                Inc = 1;
                mat = Regex.Match(explanation, BreakLine);
                if (mat.Success)
                {
                    output     += mat.Value + Environment.NewLine;
                    explanation = explanation.Replace(mat.Value, "");
                }
                mat = Regex.Match(explanation, BreakLine);
                if (mat.Success)
                {
                    single      = explanation.Remove(mat.Index, explanation.Length - mat.Index);
                    explanation = explanation.Remove(0, mat.Index);
                    //MessageBox.Show(explanation);
                }
                else
                {
                    single      = explanation;
                    explanation = null;
                }

                //while (mat.Success)
                //{
                //    single = single.Replace(mat.Value, "");
                //    mat = Regex.Match(single, BreakLine);
                //}
                string word = @"[a-zA-Z]+";
                Inc = 0;
                int index = 1;
                single = index.ToString() + ". " + single;
                foreach (Match m1 in Regex.Matches(single, word))
                {
                    if ((position = EntireCollection.FindSimilarity(m1.Value)) > 0)
                    {
                        //MessageBox.Show(mat.Value + "   " + position);
                        ++index;
                        single = single.Insert(m1.Index + position + Inc, Environment.NewLine + index.ToString() + ". ");
                        Inc   += 5;
                    }
                }
                output += single + Environment.NewLine;
            }
            //output = BreakToSentencesWordnet(explanation, EntireCollection);
            return(output);
        }