示例#1
0
        // want to keep 0-9, a-z, A-Z, '.', '?', '!', ';', '''
        // [^a-zA-Z0-9\.\?\!;' ]
        /// <summary>
        /// Trains model off static corpus
        /// </summary>
        /// <param name="fileName"></param>
        public void TrainModel(string fileName)
        {
            Queue <string> chain = new Queue <string>();

            //string[] lines = System.IO.File.ReadAllLines("../../" + fileName);

            string[] phrases = RegexLogic.GetPhrasesFromFile(fileName);
            for (int i = 0; i < phrases.Count(); i++)
            {
                string phrase = phrases[i];
                string word   = phrase.ToLower();
                if (!exceptionList.Contains(word))
                {
                    word = Regex.Replace(word, "[\\.\\?\\!;~]", "").ToLower();
                }
                if (word == "")
                {
                    break;
                }
                //if (word.Contains('\'') && !(phrase.Substring(0, 1) == phrase.Substring(0, 1).ToUpper() && phrase.Substring(phrase.Length - 2, 2) == "'s"))
                //    Debugger.Log(String.Format("{0}: {1} ({2})",Regex.Split(fileName, "\\\\").Last(), word, i+1));
                //Console.WriteLine(check);
                bool terminator = (phrase != word);
                ObserveEvent(chain, word);
                if (terminator)
                {
                    chain.Dequeue();
                    chain = ChainPush(chain);
                }
            }
            Console.WriteLine("Trained on file " + fileName);
        }
示例#2
0
        public double TestModelValuation()
        {
            Debugger.StartTest(model, testFilePath.Split('\\').Last());
            string[] phrases  = RegexLogic.GetPhrasesFromFile(testFilePath);
            double   scoreSum = 0;

            for (int i = 0; i < phrases.Count(); i++)
            {
                string phrase = phrases[i];
                string word   = GetWordFromPhrase(phrase);
                if (word == "")
                {
                    fake++;
                    continue;
                }
                double modelEvaluation = EvaluateWord(new Queue <string>(evidence.ToArray()), word);
                scoreSum += modelEvaluation;
                UpdateTestState(word, phrase);
            }
            double modelScore = scoreSum / (double)events;

            Debugger.Log(String.Format("{0}: {1}", testFilePath, modelScore));
            Debugger.FinishTest(model, testFilePath.Split('\\').Last());
            Console.WriteLine();
            return(modelScore);
        }
示例#3
0
 public Tuple <int, int> TestModelPrediction()
 {
     Debugger.StartTest(model, testFilePath.Split('\\').Last());
     //string[] lines = System.IO.File.ReadAllLines("../../" + fileName);
     string[] phrases = RegexLogic.GetPhrasesFromFile(testFilePath);
     for (int i = 0; i < phrases.Count(); i++)
     {
         string phrase = phrases[i];
         string word   = GetWordFromPhrase(phrase);
         if (word == "")
         {
             fake++;
             continue;
         }
         string prediction = PredictWord(new Queue <string>(evidence.ToArray()), word);
         if (prediction == word)
         {
             correctPredictions++;
             Debugger.LogMatch(model, testFilePath.Split('\\').Last(), word);
         }
         UpdateTestState(word, phrase);
     }
     Debugger.Log(String.Format("{0}:\n\tevents: {1}\n\tcorrect: {2}\n\tfake: {3}", testFilePath, events, correctPredictions, fake));
     Debugger.FinishTest(model, testFilePath.Split('\\').Last());
     Console.WriteLine();
     return(new Tuple <int, int>(correctPredictions, events));
 }
示例#4
0
        private static void CountFile(string file)
        {
            Console.WriteLine(file);
            List <string> phrases = new List <string>(RegexLogic.GetPhrasesFromFile(file));

            phrases.RemoveAll(item => Regex.Replace(item, "[\\.\\?\\!;~]", "") == "");
            Debugger.Log(String.Format("{0}: {1} words", file, phrases.Count));
        }