static public string findMostLikelyGenre(Data.BookData bookTested, List <Data.BookData> books, int n) { Data.BookData[] ListOfClosestBooks = getNClosestPoints(bookTested, books, n); Dictionary <string, double> oddsByGenre = new Dictionary <string, double>(); for (int i = 0; i < ListOfClosestBooks.Length; i++) { if (!oddsByGenre.ContainsKey(ListOfClosestBooks[i].Genre)) { oddsByGenre.Add(ListOfClosestBooks[i].Genre, 0); } oddsByGenre[ListOfClosestBooks[i].Genre]++; } string bestGuess = ""; double highest = -1; foreach (KeyValuePair <string, double> element in oddsByGenre) { if (element.Value > highest) { highest = element.Value; bestGuess = element.Key; } } return(bestGuess); }
static public string getClassOfBookBayes(Data.BookData bookTested, Dictionary <string, WordOddsBayes> wordOddsPerGenre) { double[] OddsPerOutcome = new double[wordOddsPerGenre.Count]; //set everthing to 1 as we need to multiply times this to increment it for (int i = 0; i < OddsPerOutcome.Length; i++) { OddsPerOutcome[i] = 1; } //we use the foreach because it's eaaser to itterate through a dictinary with that. But we need to get the index per word int counter = 0; string bestGuess = ""; foreach (KeyValuePair <string, WordOddsBayes> Genre in wordOddsPerGenre) { //foreach (KeyValuePair<string, int> word in bookTested.WordCounts) //{ // OddsPerOutcome[counter] = OddsPerOutcome[counter] * Genre.Value.OneSampleZTest(word.Value, word.Key); //} for (int i = 0; i < arrayOfBestWords.Length; i++) { string word = arrayOfBestWords[i]; double occuranceInBook = 0; if (bookTested.WordCounts.ContainsKey(word)) { occuranceInBook = bookTested.WordCounts[word]; } OddsPerOutcome[counter] = OddsPerOutcome[counter] * Genre.Value.OneSampleZTest(occuranceInBook, word); } bool shouldStateCurrentGenreAsBest = true; if (counter == 0) { bestGuess = Genre.Key; } for (int altCount = 0; altCount < counter; altCount++) { if (OddsPerOutcome[altCount] > OddsPerOutcome[counter]) { shouldStateCurrentGenreAsBest = false; } if (shouldStateCurrentGenreAsBest) { bestGuess = Genre.Key; } } counter++; } return(bestGuess); }
//n is the number of closest points you want, always set it to something odd static public Data.BookData[] getNClosestPoints(Data.BookData bookTested, List <Data.BookData> books, int n) { double[] distances = new double[n]; Data.BookData[] nClosestBooks = new Data.BookData[n]; //if there are more than one hundred million of the same words in a book we can establish that something is wrong in the world for (int i = 0; i < distances.Length; i++) { distances[i] = 100000000; } foreach (Data.BookData bookDataPoint in books) { //if the book data point has the same memory address we know this will be the same and we don't want to test it if (bookDataPoint != bookTested) { double distanceBetweenBooks = getEucDistance(bookTested, bookDataPoint); //this is for manhatten distance //double distanceBetweenBooks = getManhattenDistance(bookTested, bookDataPoint); //a simple loop to find the highest distance so far. This is the one to test against and replace int indexOfHighestDistance = 0; for (int i = 0; i < distances.Length; i++) { if (distances[indexOfHighestDistance] < distances[i]) { indexOfHighestDistance = i; } } if (distances[indexOfHighestDistance] > distanceBetweenBooks) { //if the furthest out point so far is further out than the newest book point we can assume distances[indexOfHighestDistance] = distanceBetweenBooks; nClosestBooks[indexOfHighestDistance] = bookDataPoint; } } } return(nClosestBooks); }