Exemplo n.º 1
0
        static public string findMostLikelyGenre(Data.BookData bookTested, List <Data.BookData> books, int n)
        {
            Data.BookData[]             ListOfClosestBooks = getNClosestPoints(bookTested, books, n);
            Dictionary <string, double> oddsByGenre        = new Dictionary <string, double>();

            for (int i = 0; i < ListOfClosestBooks.Length; i++)
            {
                if (!oddsByGenre.ContainsKey(ListOfClosestBooks[i].Genre))
                {
                    oddsByGenre.Add(ListOfClosestBooks[i].Genre, 0);
                }
                oddsByGenre[ListOfClosestBooks[i].Genre]++;
            }
            string bestGuess = "";
            double highest   = -1;

            foreach (KeyValuePair <string, double> element in oddsByGenre)
            {
                if (element.Value > highest)
                {
                    highest   = element.Value;
                    bestGuess = element.Key;
                }
            }

            return(bestGuess);
        }
Exemplo n.º 2
0
        static public string getClassOfBookBayes(Data.BookData bookTested, Dictionary <string, WordOddsBayes> wordOddsPerGenre)
        {
            double[] OddsPerOutcome = new double[wordOddsPerGenre.Count];
            //set everthing to 1 as we need to multiply times this to increment it
            for (int i = 0; i < OddsPerOutcome.Length; i++)
            {
                OddsPerOutcome[i] = 1;
            }
            //we use the foreach because it's eaaser to itterate through a dictinary with that. But we need to get the index per word
            int    counter   = 0;
            string bestGuess = "";

            foreach (KeyValuePair <string, WordOddsBayes> Genre in wordOddsPerGenre)
            {
                //foreach (KeyValuePair<string, int> word in bookTested.WordCounts)
                //{
                //    OddsPerOutcome[counter] = OddsPerOutcome[counter] * Genre.Value.OneSampleZTest(word.Value, word.Key);
                //}

                for (int i = 0; i < arrayOfBestWords.Length; i++)
                {
                    string word            = arrayOfBestWords[i];
                    double occuranceInBook = 0;
                    if (bookTested.WordCounts.ContainsKey(word))
                    {
                        occuranceInBook = bookTested.WordCounts[word];
                    }


                    OddsPerOutcome[counter] = OddsPerOutcome[counter] * Genre.Value.OneSampleZTest(occuranceInBook, word);
                }
                bool shouldStateCurrentGenreAsBest = true;
                if (counter == 0)
                {
                    bestGuess = Genre.Key;
                }
                for (int altCount = 0; altCount < counter; altCount++)
                {
                    if (OddsPerOutcome[altCount] > OddsPerOutcome[counter])
                    {
                        shouldStateCurrentGenreAsBest = false;
                    }
                    if (shouldStateCurrentGenreAsBest)
                    {
                        bestGuess = Genre.Key;
                    }
                }
                counter++;
            }

            return(bestGuess);
        }
Exemplo n.º 3
0
        //n is the number of closest points you want, always set it to something odd
        static public Data.BookData[] getNClosestPoints(Data.BookData bookTested, List <Data.BookData> books, int n)
        {
            double[]        distances     = new double[n];
            Data.BookData[] nClosestBooks = new Data.BookData[n];
            //if there are more than one hundred million of the same words in a book we can establish that something is wrong in the world
            for (int i = 0; i < distances.Length; i++)
            {
                distances[i] = 100000000;
            }

            foreach (Data.BookData bookDataPoint in books)
            {
                //if the book data point has the same memory address we know this will be the same and we don't want to test it
                if (bookDataPoint != bookTested)
                {
                    double distanceBetweenBooks = getEucDistance(bookTested, bookDataPoint);
                    //this is for manhatten distance
                    //double distanceBetweenBooks = getManhattenDistance(bookTested, bookDataPoint);

                    //a simple loop to find the highest distance so far. This is the one to test against and replace
                    int indexOfHighestDistance = 0;
                    for (int i = 0; i < distances.Length; i++)
                    {
                        if (distances[indexOfHighestDistance] < distances[i])
                        {
                            indexOfHighestDistance = i;
                        }
                    }
                    if (distances[indexOfHighestDistance] > distanceBetweenBooks)
                    {
                        //if the furthest out point so far is further out than the newest book point we can assume
                        distances[indexOfHighestDistance]     = distanceBetweenBooks;
                        nClosestBooks[indexOfHighestDistance] = bookDataPoint;
                    }
                }
            }
            return(nClosestBooks);
        }