public void Setup()
        {
            List <Tokenizer.WordTag> uncap = new List <Tokenizer.WordTag>()
            {
                new Tokenizer.WordTag("cristopher", "NN"),
                new Tokenizer.WordTag("nolan", "NN"),
                new Tokenizer.WordTag("can", "MD"),
                new Tokenizer.WordTag("hire", "VB"),
                new Tokenizer.WordTag("will", "NN"),
                new Tokenizer.WordTag(".", "."),
                new Tokenizer.WordTag("tip", "NN"),
                new Tokenizer.WordTag("will", "MD"),
                new Tokenizer.WordTag("hire", "VB"),
                new Tokenizer.WordTag("cristopher", "NN"),
                new Tokenizer.WordTag(".", "."),
                new Tokenizer.WordTag("will", "MD"),
                new Tokenizer.WordTag("nolan", "NN"),
                new Tokenizer.WordTag("tip", "VB"),
                new Tokenizer.WordTag("cristopher", "NN"),
                new Tokenizer.WordTag("?", "."),
                new Tokenizer.WordTag("cristopher", "NN"),
                new Tokenizer.WordTag("will", "MD"),
                new Tokenizer.WordTag("pay", "VB"),
                new Tokenizer.WordTag("tip", "NN"),
                new Tokenizer.WordTag(".", "."),
            };

            List <Tokenizer.WordTag> cap = new List <Tokenizer.WordTag>()
            {
                new Tokenizer.WordTag("Cristopher", "NN"),
                new Tokenizer.WordTag("Nolan", "NN"),
                new Tokenizer.WordTag("Will", "NN"),
                new Tokenizer.WordTag("Tip", "NN"),
                new Tokenizer.WordTag("Cristopher", "NN"),
                new Tokenizer.WordTag("Will", "MD"),
                new Tokenizer.WordTag("Nolan", "NN"),
                new Tokenizer.WordTag("Cristopher", "NN"),
                new Tokenizer.WordTag("Cristopher", "NN"),
                new Tokenizer.WordTag("Tip", "NN"),
            };


            tagger = new PartOfSpeechModel();
            tagger.CreateHiddenMarkovModel(uncap, cap);
            tagger.CalculateHiddenMarkovModelProbabilitiesForTestCorpus(testw, model: "trigram");
        }
 public PartsOfSpeech ToDomainModel(PartOfSpeechModel apiModel)
 {
     if (apiModel != null)
     {
         return new PartsOfSpeech()
                {
                    Id         = apiModel.Id,
                    ShortName  = apiModel.ShortName,
                    Name       = apiModel.Name,
                    LanguageId = apiModel.LanguageId
                }
     }
     ;
     else
     {
         throw new NullReferenceException("Cannot map empty object");
     }
 }
        public void Setup()
        {
            List <Tokenizer.WordTag> uncap = new List <Tokenizer.WordTag>()
            {
                new Tokenizer.WordTag("nano-tech", "NN"),
                new Tokenizer.WordTag("nano-tech2", "VB"),
                new Tokenizer.WordTag("lovely", "JJ"),
                new Tokenizer.WordTag("tested", "VB"),
                new Tokenizer.WordTag("semingly", "NN"),
                new Tokenizer.WordTag("testly", "RB"),
            };

            List <Tokenizer.WordTag> cap = new List <Tokenizer.WordTag>()
            {
                new Tokenizer.WordTag("Northeasterly", "NN"),
                new Tokenizer.WordTag("Epoch", "NN"),
                new Tokenizer.WordTag("Epilog", "NN"),
            };

            tagger = new PartOfSpeechModel();
            tagger.CreateHiddenMarkovModel(uncap, cap, smoothingCoef: 1);
            tagger.CalculateHiddenMarkovModelProbabilitiesForTestCorpus(testw, model: "bigram");
        }
示例#4
0
        static void Main(string[] args)
        {
            string path = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.Parent.FullName + "\\";

#if (RULE_70_30)
            Console.WriteLine("You chose Rule 70% - training, 30% - testing for the data-set!");
            const string BrownfolderTrain = "dataset\\70_30\\train", BrownfolderTest = "dataset\\70_30\\test";

            #region Load Train Files & pre-process data
            var text         = LoadAndReadFolderFiles(BrownfolderTrain);
            var oldWords     = Tokenizer.SeparateTagFromWord(Tokenizer.TokenizePennTreebank(text));
            var words        = SpeechPartClassifier.GetNewHierarchicTags(oldWords);
            var capWords     = TextPreprocessing.PreProcessingPipeline(words, toLowerOption: false, keepOnlyCapitalizedWords: true);
            var uncapWords   = TextPreprocessing.PreProcessingPipeline(words, toLowerOption: true, keepOnlyCapitalizedWords: false);
            #endregion

            #region Load Test Files & pre-process data
            var textTest     = LoadAndReadFolderFiles(BrownfolderTest);
            var oldWordsTest = Tokenizer.SeparateTagFromWord(Tokenizer.TokenizePennTreebank(textTest));
            var wordsTest    = SpeechPartClassifier.GetNewHierarchicTags(oldWordsTest);
            wordsTest = TextPreprocessing.PreProcessingPipeline(wordsTest);
            wordsTest = TextPreprocessing.Cleaning.EliminateDuplicateSequenceOfEndOfSentenceTags(wordsTest);
            #endregion

            Console.WriteLine("Done with loading and creating tokens for train & test files!");

            #region Part of Speech Model Training
            PartOfSpeechModel tagger = new PartOfSpeechModel();

            Stopwatch sw = new Stopwatch();
            sw.Start();
            tagger.CreateHiddenMarkovModel(uncapWords, capWords, smoothingCoef: 1);
            tagger.CalculateHiddenMarkovModelProbabilitiesForTestCorpus(wordsTest, model: "trigram");
            sw.Stop();
            #endregion

            #region Debug for Emissions & Transitions matrix & write trained files
            //foreach (var model in tagger.EmissionFreq)
            //{
            //    Console.WriteLine(model.Word);
            //    foreach (var item in model.TagFreq)
            //    {
            //        Console.WriteLine("     " + item.Key + " -> " + item.Value);
            //    }
            //}
            //foreach (var item in tagger.UnigramFreq)
            //    Console.WriteLine(item.Key + " -> " + item.Value);
            //foreach (var item in tagger.BigramTransition)
            //    Console.WriteLine(item.Key + " -> " + item.Value);
            //foreach (var item in tagger.TrigramTransition)
            //    Console.WriteLine(item.Key + " -> " + item.Value);

            //WriteToTxtFile("Models", "emissionWithCapital.json", JsonConvert.SerializeObject(tagger.CapitalEmissionFreq));
            //WriteToTxtFile("Models", "emission.json", JsonConvert.SerializeObject(tagger.EmissionFreq));
            //WriteToTxtFile("Models", "unigram.json", JsonConvert.SerializeObject(tagger.UnigramFreq));
            //WriteToTxtFile("Models", "bigram.json", JsonConvert.SerializeObject(tagger.BigramTransition));
            //WriteToTxtFile("Models", "trigram.json", JsonConvert.SerializeObject(tagger.TrigramTransition));
            //WriteToTxtFile("Models", "nonCapitalizedPrefix.json", JsonConvert.SerializeObject(tagger.PrefixEmissionProbabilities));
            //WriteToTxtFile("Models", "capitalizedPrefix.json", JsonConvert.SerializeObject(tagger.PrefixCapitalizedWordEmissionProbabilities));
            //WriteToTxtFile("Models", "nonCapitalizedSuffix.json", JsonConvert.SerializeObject(tagger.SuffixEmissionProbabilities));
            //WriteToTxtFile("Models", "capitalizedSuffix.json", JsonConvert.SerializeObject(tagger.SuffixCapitalizedWordEmissionProbabilities));
            //Console.WriteLine("Done writing models on filesystem!");
            #endregion

            Console.WriteLine("Done with training POS MODEL & calculating probabilities! Time: " + sw.ElapsedMilliseconds + " ms");
            Console.WriteLine("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");

            #region Decoding Viterbi Model
            Decoder decoder = new Decoder();

            sw.Reset(); sw.Start();
            decoder.ViterbiDecoding(tagger, wordsTest, modelForward: "trigram", modelBackward: "trigram", mode: "backward", beam: 0);
            sw.Stop();
            #endregion

            Console.WriteLine("Done with DECODING VITERBI MODEL! Time: " + sw.ElapsedMilliseconds + " ms");

            #region Old method to guess probabilities
            //decoder.UnknownWords = new HashSet<string>();
            //decoder.PredictedTags = new List<string>();
            //foreach (var tw in wordsTest)
            //{
            //    HMMTagger.EmissionModel modelMax;
            //    modelMax = tagger.WordTagsEmissionFrequence.Find(x => x.Word == tw.word);

            //    if (modelMax != null)
            //    {
            //        string maxTag = modelMax.TagFreq.OrderByDescending(x => x.Value).FirstOrDefault().Key;

            //        // case default-tag NN ONLY
            //        //decoder.PredictedTags.Add("NN");

            //        // case maxTag
            //        decoder.PredictedTags.Add(maxTag);
            //    }
            //    else
            //    {
            //        const string deftag = "NN";
            //        decoder.PredictedTags.Add(deftag); // NULL / NN
            //        decoder.UnknownWords.Add(tw.word);
            //    }
            //}
            #endregion

            #region Debug for Emissions & Transitions



            //foreach (var item in decoder.EmissionProbabilities)
            //{
            //    Console.WriteLine(item.Word);
            //    foreach (var item2 in item.TagFreq)
            //        Console.WriteLine("\t" + item2.Key + " -> " + item2.Value);
            //}
            //foreach (var item in decoder.UnigramProbabilities)
            //    Console.WriteLine("UNI: " + item.Key + "->" + item.Value);
            //foreach (var item in decoder.BigramTransitionProbabilities)
            //    Console.WriteLine("BI: " + item.Key + " -> " + item.Value);
            //foreach (var item in decoder.TrigramTransitionProbabilities)
            //    Console.WriteLine("TRI: " + item.Key + " -> " + item.Value);

            //foreach (var item in decoder.ViterbiGraph)
            //{
            //    foreach (var item2 in item)
            //        Console.Write(item2.CurrentTag + ":" + item2.value + "    ");
            //    Console.WriteLine();
            //}

            //Console.WriteLine("Predicted tags: ");
            //foreach (var item in decoder.PredictedTags)
            //    Console.Write(item + " ");

            Console.WriteLine("testwords: " + wordsTest.Count + " , predwords: " + decoder.PredictedTags.Count);
            #endregion

            Console.WriteLine("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");

            #region Evaluations & results
            Evaluation eval = new Evaluation();
            eval.CreateSupervizedEvaluationsMatrix(wordsTest, decoder.PredictedTags, decoder.UnknownWords, fbeta: 1);
            //using (System.IO.StreamWriter file = new System.IO.StreamWriter(path + "statistics\\" + "bdt.csv"))
            //{
            //    file.WriteLine("TAG,ACCURACY,PRECISION,RECALL(TPR),SPECIFICITY(TNR),F1-SCORE");
            //    var fullMatrix = eval.PrintClassificationResultsMatrix();
            //    for (int i = 0; i < eval.GetFullMatrixLineLength(); i++)
            //    {
            //        for (int j = 0; j < eval.GetFullMatrixColLength(); j++)
            //            file.Write(fullMatrix[i][j] + ",");
            //        file.WriteLine();
            //    }
            //}
            Console.WriteLine("TAG ACCURACY PRECISION RECALL(TPR) SPECIFICITY(TNR) F1-SCORE");
            var fullMatrix = eval.PrintClassificationResultsMatrix();
            for (int i = 0; i < eval.GetFullMatrixLineLength(); i++)
            {
                for (int j = 0; j < eval.GetFullMatrixColLength(); j++)
                {
                    Console.Write(fullMatrix[i][j] + " ");
                }
                Console.WriteLine();
            }

            Console.WriteLine("\nAccuracy for known words: " + eval.GetNaiveAccuracy(wordsTest, decoder.PredictedTags, decoder.UnknownWords, evalMode: "k"));
            Console.WriteLine("Accuracy for unknown words: " + eval.GetNaiveAccuracy(wordsTest, decoder.PredictedTags, decoder.UnknownWords, evalMode: "u"));
            Console.WriteLine("Accuracy on both: " + eval.GetNaiveAccuracy(wordsTest, decoder.PredictedTags, decoder.UnknownWords, evalMode: "k+u"));
            #endregion

            Console.WriteLine("+");

            #region Count known&unknown words
            int unkwordscount = 0, knownwordscount = 0;
            foreach (var item in wordsTest)
            {
                if (decoder.UnknownWords.Contains(item.word))
                {
                    unkwordscount++;
                }
                else
                {
                    knownwordscount++;
                }
            }

            Console.WriteLine("Unknown words (count): " + unkwordscount + " | Procentage (%): " + (float)unkwordscount / wordsTest.Count);
            Console.WriteLine("Known words (count): " + knownwordscount + " | Procentage (%): " + (float)knownwordscount / wordsTest.Count);
            Console.WriteLine("Total words (count): " + wordsTest.Count);
            #endregion


            //using (System.IO.StreamWriter file = new System.IO.StreamWriter(path + "statistics\\" + "unknown_words.csv"))
            //{
            //    file.WriteLine("Unknown Words");
            //    foreach(var item in decoder.UnknownWords)
            //    {
            //        file.WriteLine("\"" + item + "\"");
            //    }
            //}

            #region Suffix & Prefix hitrate
            //List<string> suffixStr = new List<string>();
            //List<string> prefixStr = new List<string>();
            //List<Tuple<int, int>> suffixHR = new List<Tuple<int, int>>();
            //List<Tuple<int, int>> prefixHR = new List<Tuple<int, int>>();

            //foreach (var item in tagger.SuffixEmissionProbabilities)
            //{
            //    suffixStr.Add(item.Word);
            //    suffixHR.Add(new Tuple<int, int>(0, 0));
            //}
            //foreach (var item in tagger.PrefixEmissionProbabilities)
            //{
            //    prefixStr.Add(item.Word);
            //    prefixHR.Add(new Tuple<int, int>(0, 0));
            //}

            //for (int i = 0; i < wordsTest.Count; i++)
            //{
            //    if (!decoder.UnknownWords.Contains(wordsTest[i].word)) continue;
            //    for (int j = 0; j < suffixStr.Count; j++)
            //    {
            //        if (wordsTest[i].word.EndsWith(suffixStr[j]))
            //        {
            //            int hitr = suffixHR[j].Item1;
            //            int allr = suffixHR[j].Item2 + 1;
            //            if (wordsTest[i].tag == decoder.PredictedTags[i])
            //                suffixHR[j] = new Tuple<int, int>(hitr + 1, allr);
            //            else suffixHR[j] = new Tuple<int, int>(hitr, allr);
            //            break;
            //        }
            //    }

            //    for (int j = 0; j < prefixStr.Count; j++)
            //    {
            //        if (wordsTest[i].word.ToLower().StartsWith(prefixStr[j]))
            //        {
            //            int hitr = prefixHR[j].Item1;
            //            int allr = prefixHR[j].Item2 + 1;
            //            if (wordsTest[i].tag == decoder.PredictedTags[i])
            //                prefixHR[j] = new Tuple<int, int>(hitr + 1, allr);
            //            else prefixHR[j] = new Tuple<int, int>(hitr, allr);
            //            break;
            //        }
            //    }
            //}

            //Console.WriteLine("Prefixes: ");
            //for (int i = 0; i < prefixStr.Count; i++)
            //{
            //    Console.WriteLine(prefixStr[i] + ": (" + prefixHR[i].Item1 + ", " + prefixHR[i].Item2 + ") -> " + (float)prefixHR[i].Item1 / prefixHR[i].Item2);
            //}

            //Console.WriteLine("\nSuffixes: ");
            //for (int i = 0; i < suffixStr.Count; i++)
            //{
            //    Console.WriteLine(suffixStr[i] + ": (" + suffixHR[i].Item1 + ", " + suffixHR[i].Item2 + ") -> " + (float)suffixHR[i].Item1 / suffixHR[i].Item2);
            //}
            #endregion

            #region Save predictions tags to excel
            //using (System.IO.StreamWriter file = new System.IO.StreamWriter(path + "statistics\\" + "trigram_bidirectional.csv"))
            //{
            //    file.WriteLine("Word,Real Tag,Prediction Tag,Is in Train T/F,Predicted T/F");
            //    for (int i = 0; i < wordsTest.Count; i++)
            //    {
            //        bool isInTrain = true, predictedB = false;
            //        if (decoder.UnknownWords.Contains(wordsTest[i].word))
            //            isInTrain = false;
            //        if (wordsTest[i].tag == decoder.PredictedTags[i])
            //            predictedB = true;
            //        file.WriteLine("\"" + wordsTest[i].word + "\"," + wordsTest[i].tag + "," + decoder.PredictedTags[i] + "," + isInTrain + "," + predictedB);
            //    }
            //}
            #endregion
#elif (CROSS_VALIDATION)
            const int    FOLDS   = 4;
            const bool   SHUFFLE = true;
            const string CVPATH  = "dataset\\crossvalidation";
            Console.WriteLine("You chose Cross-Validation for the data-set! Folds: " + FOLDS + ", Shuffle-option: " + SHUFFLE);

            string BrownFolderPath = path + CVPATH;

            #region Part of Speech Tag Frequence Count
            //var tx = LoadAndReadFolderFiles("dataset\\crossvalidation");
            //var ow = Tokenizer.SeparateTagFromWord(Tokenizer.WordTokenizeCorpus(tx));
            //var nw = SpeechPartClassification.GetNewHierarchicTags(ow);
            //var res = SpeechPartClassification.SpeechPartFrequence(nw);
            //foreach (var item in res)
            //    Console.WriteLine(item.Key + ": " + item.Value);
            #endregion

            List <float> knownacc = new List <float>(), unknownacc = new List <float>(), totalacc = new List <float>(), procentageunk = new List <float>();


            CrossValidation cv = new CrossValidation(filePath: BrownFolderPath, fold: FOLDS, shuffle: SHUFFLE); // with randomness
            Console.WriteLine("Done with loading dataset & splitting them into folds!\n");
            for (int foldNumber = 0; foldNumber < FOLDS; foldNumber++)
            {
                #region Load Train Files & pre-process data
                var text       = cv.TrainFile[foldNumber];
                var oldWords   = Tokenizer.SeparateTagFromWord(Tokenizer.TokenizePennTreebank(text));
                var words      = SpeechPartClassifier.GetNewHierarchicTags(oldWords);
                var capWords   = TextPreprocessing.PreProcessingPipeline(words, toLowerOption: false, keepOnlyCapitalizedWords: true);
                var uncapWords = TextPreprocessing.PreProcessingPipeline(words, toLowerOption: true, keepOnlyCapitalizedWords: false);
                #endregion

                #region Load Test Files & pre-process data
                var textTest     = cv.TestFile[foldNumber];
                var oldWordsTest = Tokenizer.SeparateTagFromWord(Tokenizer.TokenizePennTreebank(textTest));
                var wordsTest    = SpeechPartClassifier.GetNewHierarchicTags(oldWordsTest);
                wordsTest = TextPreprocessing.PreProcessingPipeline(wordsTest);
                wordsTest = TextPreprocessing.Cleaning.EliminateDuplicateSequenceOfEndOfSentenceTags(wordsTest);
                #endregion

                Console.WriteLine("Done with loading and creating tokens for train & test files!");

                #region Hidden Markov Model Training
                PartOfSpeechModel tagger = new PartOfSpeechModel();

                Stopwatch sw = new Stopwatch();

                sw.Start();
                tagger.CreateHiddenMarkovModel(uncapWords, capWords);

                tagger.CalculateHiddenMarkovModelProbabilitiesForTestCorpus(wordsTest, model: "trigram");

                sw.Stop();
                Console.WriteLine("Done with training POS MODEL & calculating probabilities! Time: " + sw.ElapsedMilliseconds + " ms");
                //Console.WriteLine("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
                #endregion


                #region Decoding Viterbi Model
                Decoder decoder = new Decoder();

                sw.Reset(); sw.Start();
                decoder.ViterbiDecoding(tagger, wordsTest, modelForward: "trigram", modelBackward: "trigram", mode: "f+b");
                sw.Stop();

                Console.WriteLine("Done with DECODING VITERBI MODEL! Time: " + sw.ElapsedMilliseconds + " ms");
                //Console.WriteLine("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
                #endregion

                #region Evaluations & results
                Evaluation eval = new Evaluation();
                //eval.CreateSupervizedEvaluationsMatrix(wordsTest, decoder.PredictedTags, decoder.UnknownWords, fbeta: 1);
                //Console.WriteLine("TAG\t\tACCURACY\t\tPRECISION\t\tRECALL(TPR)\t\tF1-SCORE\t\tSPECIFICITY(TNR)");
                //var fullMatrix = eval.PrintClassificationResultsMatrix();
                //for (int i = 0; i < eval.GetFullMatrixLineLength(); i++)
                //{
                //    for (int j = 0; j < eval.GetFullMatrixColLength(); j++)
                //        Console.Write(fullMatrix[i][j] + "\t\t");
                //    Console.WriteLine();
                //}

                var ka = eval.GetNaiveAccuracy(wordsTest, decoder.PredictedTags, decoder.UnknownWords, evalMode: "k");
                knownacc.Add(ka);
                var unkw = eval.GetNaiveAccuracy(wordsTest, decoder.PredictedTags, decoder.UnknownWords, evalMode: "u");
                unknownacc.Add(unkw);
                var tot = eval.GetNaiveAccuracy(wordsTest, decoder.PredictedTags, decoder.UnknownWords, evalMode: "k+u");
                totalacc.Add(tot);


                Console.WriteLine("\nAccuracy for known words: " + ka);
                Console.WriteLine("Accuracy for unknown words: " + unkw);
                Console.WriteLine("Accuracy on both: " + tot);
                #endregion

                Console.WriteLine("+");

                #region Count known&unknown words
                int unkwordscount = 0, knownwordscount = 0;
                foreach (var item in wordsTest)
                {
                    if (decoder.UnknownWords.Contains(item.word))
                    {
                        unkwordscount++;
                    }
                    else
                    {
                        knownwordscount++;
                    }
                }

                var proc = (float)unkwordscount / wordsTest.Count;
                procentageunk.Add(proc);

                Console.WriteLine("Unknown words (count): " + unkwordscount + " | Procentage (%): " + proc);
                Console.WriteLine("Known words (count): " + knownwordscount + " | Procentage (%): " + (float)knownwordscount / wordsTest.Count);
                Console.WriteLine("Total words (count): " + wordsTest.Count);
                #endregion

                Console.WriteLine("\n\n[FOLD " + (foldNumber + 1) + "/" + FOLDS + " DONE!]\n\n");
            }

            var known = (float)knownacc.Sum() / FOLDS;
            known = (float)Math.Round(known * 100, 3);
            var unk = (float)unknownacc.Sum() / FOLDS;
            unk = (float)Math.Round(unk * 100, 3);
            var total = (float)totalacc.Sum() / FOLDS;
            total = (float)Math.Round(total * 100, 3);
            var procunk = (float)procentageunk.Sum() / FOLDS;
            procunk = (float)Math.Round(procunk * 100, 3);

            Console.WriteLine("Procentage (%): " + procunk);
            Console.WriteLine("Accuracy for all unknown words: " + unk);
            Console.WriteLine("\nAccuracy for all known words: " + known);
            Console.WriteLine("Accuracy on all total: " + total);
#elif (DEMO_APP)
            #region Load & convert to model
            string modelsPath = path + "\\models\\";

            string unigram              = File.ReadAllText(modelsPath + "unigram.json");
            string bigram               = File.ReadAllText(modelsPath + "bigram.json");
            string trigram              = File.ReadAllText(modelsPath + "trigram.json");
            string capitalizedPrefix    = File.ReadAllText(modelsPath + "capitalizedPrefix.json");
            string nonCapitalizedPrefix = File.ReadAllText(modelsPath + "nonCapitalizedPrefix.json");
            string capitalizedSuffix    = File.ReadAllText(modelsPath + "capitalizedSuffix.json");
            string nonCapitalizedSuffix = File.ReadAllText(modelsPath + "nonCapitalizedSuffix.json");
            string emission             = File.ReadAllText(modelsPath + "emission.json");
            string emissionWithCapital  = File.ReadAllText(modelsPath + "emissionWithCapital.json");

            var unigramFreq              = JsonConvert.DeserializeObject <Dictionary <string, int> >(unigram);
            var bigramNonConverted       = JsonConvert.DeserializeObject <Dictionary <string, int> >(bigram);
            var trigramNonConverted      = JsonConvert.DeserializeObject <Dictionary <string, int> >(trigram);
            var capitalizedPrefixProb    = JsonConvert.DeserializeObject <List <PartOfSpeechModel.EmissionProbabilisticModel> >(capitalizedPrefix);
            var nonCapitalizedPrefixProb = JsonConvert.DeserializeObject <List <PartOfSpeechModel.EmissionProbabilisticModel> >(nonCapitalizedPrefix);
            var capitalizedSuffixProb    = JsonConvert.DeserializeObject <List <PartOfSpeechModel.EmissionProbabilisticModel> >(capitalizedSuffix);
            var nonCapitalizedSuffixProb = JsonConvert.DeserializeObject <List <PartOfSpeechModel.EmissionProbabilisticModel> >(nonCapitalizedSuffix);
            var emissionFreq             = JsonConvert.DeserializeObject <List <PartOfSpeechModel.EmissionModel> >(emission);
            var emissionWithCapitalFreq  = JsonConvert.DeserializeObject <List <PartOfSpeechModel.EmissionModel> >(emissionWithCapital);

            Dictionary <Tuple <string, string>, int>         bigramFreq  = new Dictionary <Tuple <string, string>, int>();
            Dictionary <Tuple <string, string, string>, int> trigramFreq = new Dictionary <Tuple <string, string, string>, int>();

            foreach (var item in bigramNonConverted)
            {
                string[] split         = item.Key.Split(',');
                var      charsToRemove = new string[] { "(", ")", " " };
                foreach (var c in charsToRemove)
                {
                    split[0] = split[0].Replace(c, string.Empty);
                    split[1] = split[1].Replace(c, string.Empty);
                }
                bigramFreq.Add(new Tuple <string, string>(split[0], split[1]), item.Value);
            }

            foreach (var item in trigramNonConverted)
            {
                string[] split         = item.Key.Split(',');
                var      charsToRemove = new string[] { "(", ")", " " };
                foreach (var c in charsToRemove)
                {
                    split[0] = split[0].Replace(c, string.Empty);
                    split[1] = split[1].Replace(c, string.Empty);
                    split[2] = split[2].Replace(c, string.Empty);
                }
                trigramFreq.Add(new Tuple <string, string, string>(split[0], split[1], split[2]), item.Value);
            }
            #endregion

            PartOfSpeechModel model = new PartOfSpeechModel(emissionFreq, emissionWithCapitalFreq, unigramFreq, bigramFreq, trigramFreq,
                                                            nonCapitalizedSuffixProb, nonCapitalizedPrefixProb, capitalizedSuffixProb, capitalizedPrefixProb);
            NLP.Decoder decoder = new NLP.Decoder();

            string        input = null;
            List <string> preprocessedInput;

            while (true)
            {
                do
                {
                    if (string.IsNullOrWhiteSpace(input))
                    {
                        input = read();
                    }
                    preprocessedInput = Tokenizer.TokenizeSentenceWords(input);
                    input             = null;
                } while (preprocessedInput.Count == 0 || preprocessedInput[0] == string.Empty);

                preprocessedInput = TextPreprocessing.PreProcessingPipeline(preprocessedInput);
                model.CalculateHiddenMarkovModelProbabilitiesForTestCorpus(preprocessedInput, model: "trigram");

                List <Tokenizer.WordTag> inputTest = new List <Tokenizer.WordTag>();
                foreach (var item in preprocessedInput)
                {
                    if (item == "." || item == "!" || item == "?")
                    {
                        inputTest.Add(new Tokenizer.WordTag(item, "."));
                    }
                    else
                    {
                        inputTest.Add(new Tokenizer.WordTag(item, ""));
                    }
                }
                if (inputTest[inputTest.Count - 1].tag != ".") // safe case check
                {
                    inputTest.Add(new Tokenizer.WordTag(".", "."));
                }

                decoder.ViterbiDecoding(model, inputTest, modelForward: "trigram", modelBackward: "trigram", mode: "f+b");

                Dictionary <string, int> histogram = new Dictionary <string, int>();
                Dictionary <int, double> freqHisto = new Dictionary <int, double>();
                foreach (var item in decoder.PredictedTags)
                {
                    if (histogram.ContainsKey(item))
                    {
                        histogram[item] += 1;
                    }
                    else
                    {
                        histogram.Add(item, 1);
                    }
                }
                int sum = histogram.Sum(x => x.Value);
                foreach (var item in histogram)
                {
                    int    index = getIndexForConversion(item.Key);
                    double val   = Math.Round(((double)item.Value / sum) * 100.0d, 1);
                    freqHisto.Add(index, val);
                }

                header(freqHisto);
                Console.ResetColor();
                Console.Write("Tagged Sentence: ");

                for (int i = 0; i < decoder.PredictedTags.Count; i++)
                {
                    int index = getIndexForConversion(decoder.PredictedTags[i]);
                    Console.ForegroundColor = frColor[index];
                    Console.BackgroundColor = bkColor[index];
                    Console.Write(" " + inputTest[i].word + " ");
                    emptySpace();
                }

                Console.WriteLine();
                Console.WriteLine();
                Console.Write("Enter your sentence here: ");
                input = Console.ReadLine();
                Console.Clear();
            }
#endif
        }