public string Analyze(String filename, DataSet dataSet)
        {
            // Fetch file, parse it and wordify it as a Dictionary
            Dictionary<String, int> candidateDocument = DataReader.ReturnWordCountFromFile(filename);

            // Determine which category it belongs to, and calculate the probability for this
            String results = this.CalculateCategory(candidateDocument, dataSet);

            return results;
        }
        // Calculates each words likelyhood and saves it for analyzis of multiple files, speeds up analyzing many files greatly.
        public void calcCWWWL(DataSet dataset)
        {
            Dictionary<string, int> vocabulary = dataset.Vocabulary;
            foreach (KeyValuePair<string, Dictionary<string, int>> categoryWords in dataset.getDataSet)
            {
                Dictionary<string, int> categoryWithDocumentCount = dataset.CategoryDocumentCount;
                Dictionary<string, double> wordWithLikelyhood = new Dictionary<string, double>();

                foreach (KeyValuePair<string, int> w in vocabulary)
                {
                    if (categoryWords.Value.ContainsKey(w.Key))
                        wordWithLikelyhood[w.Key] = (categoryWords.Value[w.Key] + 1.0) / (dataset.CategoryWordcount[categoryWords.Key] + vocabulary.Count);
                    else
                        wordWithLikelyhood[w.Key] = (1.0) / (dataset.CategoryWordcount[categoryWords.Key] + vocabulary.Count);
                }
                categoryWithWWL[categoryWords.Key] = wordWithLikelyhood;
            }
        }
        // Runs the dataAnalyzers analyze function on all files in training set.
        public static void AnalyzeAll(DataAnalyzer dataAnalyzer, DataSet data)
        {
            float correct = 0.0f;
            float wrong = 0.0f;
            string[] categories = Directory.GetDirectories(Directory.GetCurrentDirectory() + "..\\..\\..\\20_newsgroups");
            string[] strippedCategories = new string[categories.Length];

            int i = 0;
            foreach (string category in categories)
            {
                string strippedCategory = category.Substring(category.LastIndexOf('\\') + 1, category.Length - 1 - category.LastIndexOf('\\'));
                strippedCategories[i] = strippedCategory;
                i++;
            }

            for (int t = 0; t < categories.Length; t++)
            {
                int analyzedCorrectly = 0;
                int analyzedWrong = 0;
                Dictionary<string, int> wordsInCategory = new Dictionary<string, int>();
                string[] files = Directory.GetFiles(categories[t]);

                for (int q = files.Length - 300; q < files.Length; q++)
                {
                    string analyzed = dataAnalyzer.Analyze(files[q], data);
                    if (analyzed.Contains(strippedCategories[t]))
                        analyzedCorrectly++;
                    else
                        analyzedWrong++;
                    //Console.WriteLine(strippedCategories[t] + " document analyzed as: " + analyzed);
                }
                correct += analyzedCorrectly;
                wrong += analyzedWrong;
                Console.WriteLine("Out of " + (analyzedWrong + analyzedCorrectly) + " documents in " + strippedCategories[t] + ".");
                Console.WriteLine(analyzedCorrectly + " were correctly classified.");
                Console.WriteLine(analyzedWrong + " were wrongly classified.");
            }
            float percent = ((correct / (correct + wrong)) * 100.0f);
            Console.WriteLine("Correctly classified: " + correct + " of " + (correct + wrong) + " - " + percent + "%");
        }
        private String CalculateCategory(Dictionary<string, int> candidateDocument, DataSet dataset)
        {
            // Vocabulary <- All distinct words in aall documents.
            Dictionary<string, int> vocabulary = dataset.Vocabulary;
            Dictionary<string, double> highestEffect = new Dictionary<string, double>();

            double pH = 0;
            string max_group = "";
            double max_p = 0;
            foreach (KeyValuePair<string, Dictionary<string, int>> categoryWords in dataset.getDataSet)
            {
                Dictionary<string, int> categoryWithDocumentCount = dataset.CategoryDocumentCount;
                pH = (double)categoryWithDocumentCount[categoryWords.Key] / (double)dataset.documentCount;
                Dictionary<string, double> wordWithLikelyhood = new Dictionary<string, double>();

                wordWithLikelyhood = categoryWithWWL[categoryWords.Key];

                //Finds group with max P(O | H) * P(H)

                //Calculates P(O | H) * P(H) for candidate group
                double p = 0;
                foreach (KeyValuePair<string, int> wordPair in candidateDocument)
                {
                    if (vocabulary.ContainsKey(wordPair.Key))
                    {

                        p += Math.Log(wordPair.Value * (wordWithLikelyhood[wordPair.Key]));
                    }
                }
                p *= (pH);
                if (p > max_p || max_p == 0)
                {
                    max_p = p;
                    max_group = categoryWords.Key;
                }

            }

            return "Category: " + max_group + ". Likelyhood: " + max_p + ".";
        }
        static void Main(string[] args)
        {
            DateTime start = DateTime.Now;

            Console.WriteLine("Creating dataset from test files.");

            DataSet data = new DataSet();

            DateTime end = DateTime.Now;
            TimeSpan duration = end - start;

            Console.WriteLine("Dataset created in " + String.Format("{0:0.00}", duration.TotalSeconds) + " seconds.");
            Console.WriteLine("Calculating P(w|h) for each word in each category using given dataset.");
            DataAnalyzer dataAnalyzer = new DataAnalyzer();

            // Calculates the Word with likelyhood dictionary
            // Much faster than doing it once each document.
            // Must be recalculated to reflect changes if training set is altered.
            dataAnalyzer.calcCWWWL(data);

            Console.Clear();
            Console.WriteLine("Dataset is ready.");
            Console.WriteLine("Enter command for execution - \"help\" for help.");

            bool done = false;
            while (!done)
            {
                Console.Write("> ");
                string command = Console.ReadLine();
                string[] commandSplit = command.Split(' ');

                switch (commandSplit[0].ToLower())
                {
                    case "exit":
                        {
                            done = true;
                            break;
                        }
                    case "analyzeall":
                        {
                            Console.Clear();
                            AnalyzeAll(dataAnalyzer, data);
                            break;
                        }
                    case "analyze":
                        {
                            Console.Clear();
                            try
                            {
                                if (commandSplit.Length > 1)
                                    Console.WriteLine(dataAnalyzer.Analyze(commandSplit[1], data));
                                else
                                    Console.WriteLine("Enter a path as well. Syntax \"analyze filepath\".");
                            }
                            catch (Exception e)
                            {
                                Console.WriteLine("Unable to analyze " + commandSplit[1] + ". Check that file is located at this path.");
                            }
                            break;
                        }
                    case "help":
                        {
                            Console.Clear();
                            Console.WriteLine("Commands:");
                            Console.WriteLine("Exit - exits the program.");
                            Console.WriteLine("analyze filePath - attempts to categorize file located at given filepath.");
                            Console.WriteLine("analyzeAll - attempts to categorize all files in training set.");
                            break;
                        }
                    default:
                        {
                            Console.Clear();
                            Console.WriteLine("Unable to recognize command - write help for list of commands.");
                            break;
                        }
                }
            }
        }