Пример #1
0
 /// <summary>
 /// Writes each of the dictionaries to file
 /// </summary>
 public void WriteToFile()
 {
     Char_gram.ToCSV("Char.csv");
     Uni_gram.ToCSV("Uni.csv");
     Bi_gram.ToCSV("Bi.csv");
     Tri_gram.ToCSV("Tri.csv");
 }
Пример #2
0
        /// <summary>
        /// This will take in a text file, read the file by line, parse the terms, stem the terms, and set the terms into the correct dictionary
        /// </summary>
        /// <param name="path">The path to the file that is being currently worked</param>
        public void IndexFile(string path)
        {
            //Read in the file
            FileInfo     info = new FileInfo(path);
            StreamReader sr   = new StreamReader(path);

            //Loop through the file line by line
            while (!sr.EndOfStream)
            {
                string orgline = sr.ReadLine();

                string[] terms = orgline.Split(' ');

                //Break the line down into its individual characters
                foreach (string s in terms)
                {
                    char[] scrubList = Token_Char(s);
                    foreach (char c in scrubList)
                    {
                        Char_gram.Add(c.ToString());
                    }
                }

                //break the line down into its individual tokens
                string line = Token_String(orgline);
                terms = line.Split(' ');

                List <string> temp = new List <string>();

                //remove any white spaces that ended up in the set
                foreach (string s in terms)
                {
                    string hold = s.Trim();
                    if (hold.Length > 0)
                    {
                        temp.Add(hold);
                    }
                }

                terms = temp.ToArray();

                //if the set has terms then process the single terms
                if (terms.Length >= 1)
                {
                    foreach (string term in terms)
                    {
                        //if (term.Contains("affection"))
                        //{
                        //    string s = term;
                        //}
                        Uni_gram.Add(Stemmer.Stem(term));
                    }
                }

                //if the set has terms then process the bi-terms
                if (terms.Length >= 2)
                {
                    string biGram = "";
                    for (int i = 0; i < terms.Length - 1; i++)
                    {
                        biGram = Stemmer.Stem(terms[i]) + " " + Stemmer.Stem(terms[i + 1]);
                        Bi_gram.Add(biGram);
                    }
                }

                //if the set has terms then process the tri-terms
                if (terms.Length >= 3)
                {
                    string triGram = "";
                    for (int i = 0; i < terms.Length - 2; i++)
                    {
                        triGram = Stemmer.Stem(terms[i]) + " " + Stemmer.Stem(terms[i + 1]) + " " + Stemmer.Stem(terms[i + 2]);
                        Tri_gram.Add(triGram);
                    }
                }
            }
        }