Esempio n. 1
0
        public void ExecuteTest()
        {
            string[][] words =
            {
                new string[] { "今日", "は", "いい", "天気", "です"   },
                new string[] { "明日", "も", "いい", "天気", "でしょう" }
            };

            var codebook = new BagOfWords()
            {
                //MaximumOccurance = 1 // the resulting vector will have only 0's and 1's
                MaximumOccurance = int.MaxValue
            };

            // Compute the codebook (note: this would have to be done only for the training set)
            codebook.Learn(words);

            // Now, we can use the learned codebook to extract fixed-length
            // representations of the different texts (paragraphs) above:

            // Extract a feature vector from the text 1:
            double[] bow1 = codebook.Transform(words[0]);

            // Extract a feature vector from the text 2:
            double[] bow2 = codebook.Transform(words[1]);

            // we could also have transformed everything at once, i.e.
            double[][] bow = codebook.Transform(words);


            // Now, since we have finite length representations (both bow1 and bow2 should
            // have the same size), we can pass them to any classifier or machine learning
            // method. For example, we can pass them to a Logistic Regression Classifier to
            // discern between the first and second paragraphs

            // Lets create a Logistic classifier to separate the two paragraphs:
            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                Tolerance      = 1e-4, // Let's set some convergence parameters
                Iterations     = 100,  // maximum number of iterations to perform
                Regularization = 0
            };

            // Now, we use the learning algorithm to learn the distinction between the two:
            LogisticRegression reg = learner.Learn(new[] { bow1, bow2 }, new[] { false, true });

            // Finally, we can predict using the classifier:
            bool c1 = reg.Decide(bow1); // Should be false
            bool c2 = reg.Decide(bow2); // Should be true

            Console.WriteLine(c1);
            Console.WriteLine(c2);
        }
Esempio n. 2
0
        //LogisticRegression reg;


        public LogClassifier(string fileName, int countLayers, int countEpoch)
        {
            Codebook = new BagOfWords()
            {
                MaximumOccurance = 1
            };

            int samples    = 0;
            var dictionary = Utilities.ReadHostFile(fileName, ref samples);

            Samples = samples;

            if (dictionary.Item1.Length != 0 && dictionary.Item2.Length != 0)
            {
                Codebook.Learn(dictionary.Item1);
                double[][] inputs = Codebook.Transform(dictionary.Item1);
                int        count  = inputs.Count();

                //var learner = new IterativeReweightedLeastSquares<LogisticRegression>()
                //{
                //    Tolerance = 1e-4,  // Let's set some convergence parameters
                //    Iterations = 10,  // maximum number of iterations to perform
                //    Regularization = 0
                //};

                //reg = learner.Learn(inputs, outputs2);
                double[][] outputs = Utilities.BoolToDouble(dictionary.Item2);
                classifier = new SimpleClassifierNN(inputs, outputs, count, countLayers, countEpoch);
                var trainingResult = classifier.Train(inputs, outputs);
                Error        = trainingResult.Item1;
                TrainingTime = trainingResult.Item2;
            }
        }
Esempio n. 3
0
        public static List <string> GetKeywords(string[] body, int count, int minOccurance = 0)
        {
            var bow = new BagOfWords()
            {
                MaximumOccurance = 500
            };

            bow.Learn(body);
            int[] codedBody = new int[body.Length];
            bow.Transform(body, codedBody);

            var dictionary = codedBody.Select((value, index) => new { value, index })
                             .ToDictionary(pair => pair.index, pair => pair.value)
                             .OrderByDescending(x => x.Value)
                             .Where(x => x.Value > minOccurance)
                             .Select(x => x.Key)
                             .Take(count)
                             .ToList();

            List <string> result   = new List <string>();
            var           codebook = bow.CodeToString;

            foreach (var keyWord in dictionary)
            {
                result.Add(codebook[keyWord]);
            }
            return(result);
        }
Esempio n. 4
0
        private double[] ConvertInputs(string[] input, string[] subject, string[] text)
        {
            var rInput   = _inputBagOfWords.Transform(input);
            var rSubject = _subjectBagOfWords.Transform(subject);
            var rText    = _textBagOfWords.Transform(text);

            return(AccordHelpers.CombineInput(rInput, rSubject, rText));
        }
Esempio n. 5
0
        public SentimentResult Predict(string sentiment)
        {
            var result     = new SentimentResult();
            var tokenized  = _preprocessor.Process(sentiment);
            var featurized = _bagOfWords.Transform(tokenized).ToInt32();

            var scores = _bayes.Scores(featurized);
            var prob   = _bayes.Probabilities(featurized);

            result.Polarity            = _bayes.Decide(featurized) == 0 ? Polarity.Negative : Polarity.Positive;
            result.NegativeScore       = scores[0];
            result.PositiveScore       = scores[1];
            result.NegativeProbability = prob[0];
            result.PositiveProbability = prob[1];

            return(result);
        }
Esempio n. 6
0
 private double[][] CreateTextBagOfWords(string[][] inputs)
 {
     _textBagOfWords = new BagOfWords()
     {
         MaximumOccurance = 1
     };
     _textBagOfWords.Learn(inputs);
     return(_textBagOfWords.Transform(inputs));
 }
Esempio n. 7
0
        /// <summary>
        /// Formats a list of Tweets to Bag-of-Words format
        /// </summary>
        /// <param name="tweets"> A list of tweets </param>
        /// <returns>  Formatted Tweets in Bag of Words format </returns>
        double[][] FormatTweets(List <Tweet> tweets)
        {
            List <string> _tweets = new List <string>();

            foreach (Tweet item in tweets)
            {
                _tweets.Add(item.Text);
            }

            //Whitespace tokenizer
            //string[][] tokens = tweets.ToArray().Tokenize();

            //Custom Tokenizer
            string[][] tokens = tp.Tokenizer(_tweets);

            double[][] input = bagOfWords.Transform(tokens);

            return(input);
        }
        //Make this class a singleton so that it is not retrained for every class it is used by
        private TextAnalyzer()
        {
            //Usage of a Naive Bayes classifier
            //Create the trainer, allowing for some regularlizatiton
            var teacher = new NaiveBayesLearning <NormalDistribution, NormalOptions>()
            {
                Options = { InnerOption = { Regularization = 1e-6 } }
            };

            //Read in the training data and stop words
            string liberalTrainingPath      = System.Web.Hosting.HostingEnvironment.MapPath(@"~/Data/liberal_training.txt");
            string conservativeTrainingPath = System.Web.Hosting.HostingEnvironment.MapPath(@"~/Data/conservative_training.txt");
            string stopWordsPath            = System.Web.Hosting.HostingEnvironment.MapPath(@"~/Data/stop_words.txt");

            string[] liberalSamples      = File.ReadAllLines(liberalTrainingPath);
            string[] conservativeSamples = File.ReadAllLines(conservativeTrainingPath);
            stopWords = File.ReadAllLines(stopWordsPath);

            //Concat the samples into one array (They are first read into their own array to allow us to know the amount of samples in each file)
            string[] samples = liberalSamples.Concat(conservativeSamples).ToArray();

            //Break the text up into individual words
            string[][] words = samples.Tokenize();

            //If for some reason we didn't actually read any training data, throw an exception cuz the classifier wont work
            if (words.Length == 0)
            {
                throw new Exception("No training data for TextAnalyzer");
            }

            //Remove common english words
            words = TrimStopWords(words);

            //Create a bag of words using the tokenized sample data
            bagOfWords = new BagOfWords();
            bagOfWords.Learn(words);

            //Populate the output array using the known lengths of the sample files
            int[] outputs = new int[samples.Length];
            for (int i = 0; i < samples.Length; i++)
            {
                if (i < liberalSamples.Length)
                {
                    outputs[i] = 0;
                }
                else
                {
                    outputs[i] = 1;
                }
            }

            //Train the classifier
            double[][] inputs = bagOfWords.Transform(words);
            nbClassifier = teacher.Learn(inputs, outputs);
        }
Esempio n. 9
0
        static void Main(string[] args)
        {
            DiferenciasEntities db = new DiferenciasEntities();
            var query = db.Edicion.Where(x => x.Tecnico.Equals("CSM05"));

            string[]   edits    = query.Select(x => x.CadenaInicial).ToArray();
            string[][] words    = edits.Tokenize();
            var        codebook = new BagOfWords()
            {
                MaximumOccurance = 1
            };

            codebook.Learn(words);

            double[] bow1   = codebook.Transform(words[0]);
            double[] bow2   = codebook.Transform(words[1]);
            Cosine   cosine = new Cosine();

            Console.WriteLine(cosine.Similarity(bow1, bow2));
        }
Esempio n. 10
0
        /// <summary>
        /// Analyzes an array of text using a Naive Bayes classifier and returns estimated political leaning
        /// </summary>
        /// <param name="texts">Array of texts to classify</param>
        /// <returns>Average of all political leanings for the texts array. 0.0 is conservative, 1.0 is liberal, 0.5 is moderate</returns>
        public float Analyze(string[] texts)
        {
            if (texts.Length == 0)
            {
                return(DEFAULT_RANK);
            }
            //Similar process to training: Tokenize, remove common words, throw into the bag of words
            string[][] words = texts.Tokenize();
            words = TrimStopWords(words);
            double[][] transform = bagOfWords.Transform(words);

            //Get the actual results and average them if possible
            int[] results = nbClassifier.Decide(transform);
            if (results.Length == 0)
            {
                return(DEFAULT_RANK);
            }
            return((float)results.Average());
        }
Esempio n. 11
0
        public TrainingResult Train()
        {
            var result = new TrainingResult();

            // load training data
            result.StartMeasure(TrainingResult.RecordType.LoadDataset);
            var       reader    = new ExcelReader(Helpers.DatasetPath);
            DataTable dataStore = reader.GetWorksheet("Training");

            int[]    labels    = dataStore.ToVector <int>("Label");
            string[] learnData = dataStore.ToVector <string>("Sentiment");
            result.StopMeasure();

            // tokenize
            result.StartMeasure(TrainingResult.RecordType.Tokenization);
            string[][] tokenized = learnData.Select(x => _preprocessor.Process(x)).ToArray();
            result.StopMeasure();

            // train bag of words
            result.StartMeasure(TrainingResult.RecordType.BagOfWordsLearning);
            _bagOfWords = new BagOfWords();
            _bagOfWords.Learn(tokenized);
            result.StopMeasure();

            // vectorization of tokens
            result.StartMeasure(TrainingResult.RecordType.Featurization);
            int[][] featurized = _bagOfWords.Transform(tokenized).ToInt32();
            result.StopMeasure();

            // train
            result.StartMeasure(TrainingResult.RecordType.NaiveBayesLearning);
            var teacher = new NaiveBayesLearning();

            _bayes = teacher.Learn(featurized, labels);
            result.StopMeasure();

            return(result);
        }
        private void Button_Click(object sender, RoutedEventArgs e)
        {
            string[] texts =
            {
                @"The concept of grouping students together in a centralized location for learning has existed since Classical antiquity. Formal schools have existed at least since ancient Greece (see Academy), ancient Rome (see Education in Ancient Rome) ancient India (see Gurukul), and ancient China (see History of education in China). The Byzantine Empire had an established schooling system beginning at the primary level. According to Traditions and Encounters, the founding of the primary education system began in 425 AD and ... military personnel usually had at least a primary education .... The sometimes efficient and often large government of the Empire meant that educated citizens were a must. Although Byzantium lost much of the grandeur of Roman culture and extravagance in the process of surviving, the Empire emphasized efficiency in its war manuals. The Byzantine education system continued until the empire's collapse in 1453 AD.[4]",
                @"In Western Europe a considerable number of cathedral schools were founded during the Early Middle Ages in order to teach future clergy and administrators, with the oldest still existing, and continuously operated, cathedral schools being The King's School, Canterbury (established 597 CE), King's School, Rochester (established 604 CE), St Peter's School, York (established 627 CE) and Thetford Grammar School (established 631 CE). Beginning in the 5th century CE monastic schools were also established throughout Western Europe, teaching both religious and secular subjects.",
                @"Islam was another culture that developed a school system in the modern sense of the word. Emphasis was put on knowledge, which required a systematic way of teaching and spreading knowledge, and purpose-built structures. At first, mosques combined both religious performance and learning activities, but by the 9th century, the madrassa was introduced, a school that was built independently from the mosque, such as al-Qarawiyyin, founded in 859 CE. They were also the first to make the Madrassa system a public domain under the control of the Caliph.",
                @"Under the Ottomans, the towns of Bursa and Edirne became the main centers of learning. The Ottoman system of Külliye, a building complex containing a mosque, a hospital, madrassa, and public kitchen and dining areas, revolutionized the education system, making learning accessible to a wider public through its free meals, health care and sometimes free accommodation.",
                @"In Europe, universities emerged during the 12th century; here, scholasticism was an important tool, and the academicians were called schoolmen. During the Middle Ages and much of the Early Modern period, the main purpose of schools (as opposed to universities) was to teach the Latin language. This led to the term grammar school, which in the United States informally refers to a primary school, but in the United Kingdom means a school that selects entrants based on ability or aptitude. Following this, the school curriculum has gradually broadened to include literacy in the vernacular language as well as technical, artistic, scientific and practical subjects.",
                @"Obligatory school attendance became common in parts of Europe during the 18th century. In Denmark-Norway, this was introduced as early as in 1739-1741, the primary end being to increase the literacy of the almue, i.e. the regular people.[5] Many of the earlier public schools in the United States and elsewhere were one-room schools where a single teacher taught seven grades of boys and girls in the same classroom. Beginning in the 1920s, one-room schools were consolidated into multiple classroom facilities with transportation increasingly provided by kid hacks and school buses."
            };

            string[][] words = texts.Tokenize();

            var Bow = new BagOfWords(words);

            // Create a new TF-IDF with options:
            var codebook = new TFIDF()
            {
                Tf  = TermFrequency.Log,
                Idf = InverseDocumentFrequency.Default,
            };

            // Compute the codebook (note: this would have to be done only for the training set)
            codebook.Learn(words);

            // Now, we can use the learned codebook to extract fixed-length
            // representations of the different texts (paragraphs) above:

            // Extract a feature vector from the text 1:
            List <double[]> lstDocumentsScores = new List <double[]>();

            for (int i = 0; i < texts.Length; i++)
            {
                lstDocumentsScores.Add(codebook.Transform(words[i]));
            }

            var teacher = new MulticlassSupportVectorLearning <Linear>()
            {
                // using LIBLINEAR's L2-loss SVC dual for each SVM
                Learner = (p) => new LinearDualCoordinateDescent()
                {
                    Loss = Loss.L2
                }
            };

            double[][] features = Bow.Transform(words);

            teacher.ParallelOptions.MaxDegreeOfParallelism = 1; // (Remove, comment, or change this line to enable full parallelism)

            // Learn a machine
            var machine = teacher.Learn(features, new int[] { 0, 0, 1, 1, 0, 0 });

            int[] predicted = machine.Decide(features);

            double error = new ZeroOneLoss(new int[] { 0, 0, 1, 1, 0, 0 }).Loss(predicted);

            // Extract a feature vector from the text 2:
            //example
            // double[] bow2 = codebook.Transform(words[1]);

            var indexSerachedTerm = Bow.StringToCode["Ottomans".ToLower()];

            double dblMaxScore     = double.MinValue;
            int    irWhichDocument = int.MinValue;

            for (int i = 0; i < texts.Length; i++)
            {
                if (lstDocumentsScores[i][indexSerachedTerm] > dblMaxScore)
                {
                    irWhichDocument = i;
                    dblMaxScore     = lstDocumentsScores[i][indexSerachedTerm];
                }
            }
        }
Esempio n. 13
0
        static void Main()
        {
            // Path to the folder with classifiers models
            var jarRoot = @"\Users\devir\OneDrive\Documents\Visual Studio 2015\Projects\ner";
            var classifiersDirecrory = jarRoot + @"\classifiers";

            // Loading 3 class classifier model
            var classifier = CRFClassifier.getClassifierNoExceptions(
                classifiersDirecrory + @"\english.muc.7class.distsim.crf.ser.gz");

            var s1 = " She got up this morning at 9:00 am and went to a shop to spend five dollars to buy a 50% off toothbrush.";


            var s2 = "Tell the latest on olympics from the New York.";

            Console.WriteLine("{0}\n", classifier.classifyToCharacterOffsets(s1));
            Console.WriteLine("{0}\n", classifier.classifyWithInlineXML(s1));

            //MUNCULIN NER SATU SATU
            string result = classifier.classifyWithInlineXML(s1);
            String substr1 = "TIME";
            String substr2 = "LOCATION";
            String substr3 = "PERSON";
            String substr4 = "ORGANIZATION";
            String substr5 = "MONEY";
            String substr6 = "Percent";
            String substr7 = "Date";
            string total1, total2, total3, total4, total5, total6, total7;

            //if (result.Contains(substr1))
            //{
            //    string[] hasiltime = GetStringInBetween("<TIME>", "</TIME>", result, false, false);
            //    string output_time = hasiltime[0];
            //    string next_time = hasiltime[1];
            //    total1 = output_time;
            //   // Console.WriteLine(output_time);
            //}
            //if (result.Contains(substr2))
            //{
            //    string[] hasillocation = GetStringInBetween("<LOCATION>", "</LOCATION>", result, false, false);
            //    string output_location = hasillocation[0];
            //    string next_loc = hasillocation[1];
            //    //Console.WriteLine(output_location);
            //    total2 = output_location;
            //}
            //if (result.Contains(substr3))
            //{
            //    string[] hasilperson = GetStringInBetween("<PERSON>", "</PERSON>", result, false, false);
            //    string output_person = hasilperson[0];
            //    string next_person = hasilperson[1];
            //    //Console.WriteLine(hasilperson);
            //    total3 = output_person;
            //}
            //if (result.Contains(substr4))
            //{
            //    string[] hasilORGANIZATION = GetStringInBetween("<ORGANIZATION>", "</ORGANIZATION>", result, false, false);
            //    string output_ORGANIZATION = hasilORGANIZATION[0];
            //    string next_ORGANIZATION = hasilORGANIZATION[1];
            //    //Console.WriteLine(output_ORGANIZATION);
            //    total4 = output_ORGANIZATION;
            //}
            //if (result.Contains(substr5))
            //{
            //    string[] hasilMONEY = GetStringInBetween("<MONEY>", "</MONEY>", result, false, false);
            //    string output_MONEY = hasilMONEY[0];
            //    string next_MONEY = hasilMONEY[1];
            //    // Console.WriteLine(output_MONEY);
            //    total5 = output_MONEY;
            //}
            //if (result.Contains(substr6))
            //{
            //    string[] hasilPercent = GetStringInBetween("<Percent>", "</Percent>", result, false, false);
            //    string output_Percent = hasilPercent[0];
            //    string next_Percent = hasilPercent[1];
            //    //Console.WriteLine(output_Percent);
            //    total6 = output_Percent;
            //}
            //if (result.Contains(substr7))
            //{
            //    string[] hasilDate = GetStringInBetween("<Date>", "</Date>", result, false, false);
            //    string output_Date = hasilDate[0];
            //    string next_Date = hasilDate[1];
            //    //Console.WriteLine(output_Date);
            //    total7 = output_Date;

            //}

            string[] hasiltime   = GetStringInBetween("<TIME>", "</TIME>", result, false, false);
            string   output_time = hasiltime[0];
            string   next_time   = hasiltime[1];

            total1 = output_time;
            //Console.WriteLine(output_time);

            string[] hasillocation   = GetStringInBetween("<LOCATION>", "</LOCATION>", result, false, false);
            string   output_location = hasillocation[0];
            string   next_loc        = hasillocation[1];

            //Console.WriteLine(output_location);
            total2 = output_location;

            string[] hasilperson   = GetStringInBetween("<PERSON>", "</PERSON>", result, false, false);
            string   output_person = hasilperson[0];
            string   next_person   = hasilperson[1];

            //Console.WriteLine(hasilperson);
            total3 = output_person;

            string[] hasilORGANIZATION   = GetStringInBetween("<ORGANIZATION>", "</ORGANIZATION>", result, false, false);
            string   output_ORGANIZATION = hasilORGANIZATION[0];
            string   next_ORGANIZATION   = hasilORGANIZATION[1];

            //Console.WriteLine(output_ORGANIZATION);
            total4 = output_ORGANIZATION;

            string[] hasilMONEY   = GetStringInBetween("<MONEY>", "</MONEY>", result, false, false);
            string   output_MONEY = hasilMONEY[0];
            string   next_MONEY   = hasilMONEY[1];

            // Console.WriteLine(output_MONEY);
            total5 = output_MONEY;

            string[] hasilPercent   = GetStringInBetween("<Percent>", "</Percent>", result, false, false);
            string   output_Percent = hasilPercent[0];
            string   next_Percent   = hasilPercent[1];

            //Console.WriteLine(output_Percent);
            total6 = output_Percent;

            string[] hasilDate   = GetStringInBetween("<Date>", "</Date>", result, false, false);
            string   output_Date = hasilDate[0];
            string   next_Date   = hasilDate[1];

            //Console.WriteLine(output_Date);
            total7 = output_Date;


            //BOW
            string semua = total1 + ";" + total2 + ";" + total3 + ";" + total4 + ";" + total5 + ";" + total6 + ";" + total7 + ";";

            Console.WriteLine(semua);
            string[] gabungan = { total1, total2, total3, total4, total5, total6, total7 };

            foreach (var a in gabungan)
            {
                Console.WriteLine(a);
            }
            string[][] words = gabungan.Tokenize();
            //var codebook = new TFIDF()
            //{
            //    Tf = TermFrequency.Log,
            //    Idf = InverseDocumentFrequency.Default
            //};
            var codebook = new BagOfWords()
            {
                MaximumOccurance = 1 // the resulting vector will have only 0's and 1's
            };

            codebook.Learn(words);
            double[]   bow1            = codebook.Transform(words[0]);
            double[]   bow2            = codebook.Transform(words[1]);
            double[]   bow3            = codebook.Transform(words[2]);
            double[]   bow4            = codebook.Transform(words[3]);
            double[]   bow5            = codebook.Transform(words[4]);
            double[]   bow6            = codebook.Transform(words[5]);
            double[]   bow7            = codebook.Transform(words[6]);
            double[][] keseluruhanBOW1 = { bow1, bow2, bow3, bow4, bow5, bow6, bow7 };

            //coba
            bool quitNow = false;

            while (!quitNow)
            {
                string val;
                Console.Write("Enter question: ");
                val = Console.ReadLine();
                string[] textss =
                {
                    val,
                };



                string[][] wordss = textss.Tokenize();
                //var codebook2 = new TFIDF()
                //{
                //    Tf = TermFrequency.Log,
                //    Idf = InverseDocumentFrequency.Default
                //};
                var codebook2 = new BagOfWords()
                {
                    MaximumOccurance = 1 // the resulting vector will have only 0's and 1's
                };
                codebook2.Learn(wordss);
                double[] c1   = codebook2.Transform(wordss[0]);
                string   path = @"C:\Users\devir\OneDrive\Documents\Visual Studio 2015\Projects\ner";
                //var load_svm_model = Serializer.Load<MulticlassClassifierBase>(Path.Combine(path, "pelatihanSVMbayardanpergi.bin"));


                //LibSvmModel modela = LibSvmModel.Load(Path.Combine(path, "pelatihanSVMbayardanpergi.bint"));
                //int jawaban = load_svm_model.Decide( c1); // answer will be 2.
                // Now, we can use the model class to create the equivalent Accord.NET SVM:

                //Console.WriteLine(jawaban);
                LibSvmModel model = LibSvmModel.Load(Path.Combine(path, "pelatihanSVMbayardanpergi.txt"));

                // Now, we can use the model class to create the equivalent Accord.NET SVM:
                SupportVectorMachine svm = model.CreateMachine();

                // Compute classification error
                bool predicted = svm.Decide(c1);

                // var machine = teacher.Learn(inputs, outputs);

                if (predicted == false)
                {
                    Console.WriteLine("BAYAR");
                }
                ;
                if (predicted == true)
                {
                    Console.WriteLine("PERGI");
                }
                ;
                Console.ReadLine();
            }

            // In order to convert any 2d array to jagged one
            // let's use a generic implementation
        }
Esempio n. 14
0
        public void ClassifyText()
        {
            readFile();

            string[] words = { sbGeneratedByProgram.ToString(), sbNotGeneratedByProgram.ToString() };

            string[][] word = words.Tokenize();
            // Create a new Bag-of-Words for the texts
            var bow = new BagOfWords()
            {
                MaximumOccurance = 1 // the resulting vector will have only 0's and 1's
            };

            bow.Learn(word);
            //bow.Learn(Tokens);

            // Create input and outputs for training
            double[] inputP  = bow.Transform(word[0]);
            double[] inputNP = bow.Transform(word[1]);

            int[][] inputs =
            {
                inputP.ToInt32(),
                inputNP.ToInt32()
            };

            int[] outputs =
            {
                0, // Program Generated
                1  // Not Program Generated
            };

            // Create the naïve bayes model
            var learner = new NaiveBayesLearning();

            learner.Options.InnerOption.UseLaplaceRule = true;

            var nb = learner.Learn(inputs, outputs);

            string[] text   = "Yup! Two years without a car! Downtown living is morning too I got that this morning too too If the wi. Here's hoping!".Tokenize();
            int[]    input  = bow.Transform(text).ToInt32();
            int      answer = nb.Decide(input);

            Console.WriteLine(answer);
            Console.WriteLine(nb.Probabilities(input)[0] + " " + nb.Probabilities(input)[1]);
            // Learn a Naive Bayes model from the examples
            //{
            //    // This classifies as Not Program Generated
            //    string text = @"How moms use their iPhones";
            //    int[] input = bow.GetFeatureVector(Tokenize(text));
            //    int answer = bayes.Compute(input);
            //   Console.WriteLine( bayes.Probabilities(input)[0]+" "+ bayes.Probabilities(input)[1]);

            //    string result = classes[answer];

            //    Console.WriteLine("1) Test: {0}", result);
            //}

            //{
            //    // This classifies as spam
            //    string text = @" WINNING GRACIOUSNESS";
            //    int[] input = bow.GetFeatureVector(Tokenize(text));
            //    int answer = bayes.Compute(input);
            //    string result = classes[answer];
            //    Console.WriteLine(bayes.Probabilities(input)[0] + " " + bayes.Probabilities(input)[1]);

            //    Console.WriteLine("2) Test: {0}", result);
            //}
        }
Esempio n. 15
0
        public void learn_test()
        {
            #region doc_learn
            // The Bag-Of-Words model can be used to extract finite-length feature
            // vectors from sequences of arbitrary length, like for example, texts:


            string[] texts =
            {
                @"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas molestie malesuada 
                  nisi et placerat. Curabitur blandit porttitor suscipit. Nunc facilisis ultrices felis,
                  vitae luctus arcu semper in. Fusce ut felis ipsum. Sed faucibus tortor ut felis placerat
                  euismod. Vestibulum pharetra velit et dolor ornare quis malesuada leo aliquam. Aenean 
                  lobortis, tortor iaculis vestibulum dictum, tellus nisi vestibulum libero, ultricies 
                  pretium nisi ante in neque. Integer et massa lectus. Aenean ut sem quam. Mauris at nisl 
                  augue, volutpat tempus nisl. Suspendisse luctus convallis metus, vitae pretium risus 
                  pretium vitae. Duis tristique euismod aliquam",

                @"Sed consectetur nisl et diam mattis varius. Aliquam ornare tincidunt arcu eget adipiscing. 
                  Etiam quis augue lectus, vel sollicitudin lorem. Fusce lacinia, leo non porttitor adipiscing, 
                  mauris purus lobortis ipsum, id scelerisque erat neque eget nunc. Suspendisse potenti. Etiam 
                  non urna non libero pulvinar consequat ac vitae turpis. Nam urna eros, laoreet id sagittis eu,
                  posuere in sapien. Phasellus semper convallis faucibus. Nulla fermentum faucibus tellus in 
                  rutrum. Maecenas quis risus augue, eu gravida massa."
            };

            string[][] words = texts.Tokenize();

            // Create a new BoW with options:
            var codebook = new BagOfWords()
            {
                MaximumOccurance = 1 // the resulting vector will have only 0's and 1's
            };

            // Compute the codebook (note: this would have to be done only for the training set)
            codebook.Learn(words);


            // Now, we can use the learned codebook to extract fixed-length
            // representations of the different texts (paragraphs) above:

            // Extract a feature vector from the text 1:
            double[] bow1 = codebook.Transform(words[0]);

            // Extract a feature vector from the text 2:
            double[] bow2 = codebook.Transform(words[1]);

            // we could also have transformed everything at once, i.e.
            // double[][] bow = codebook.Transform(words);


            // Now, since we have finite length representations (both bow1 and bow2 should
            // have the same size), we can pass them to any classifier or machine learning
            // method. For example, we can pass them to a Logistic Regression Classifier to
            // discern between the first and second paragraphs

            // Lets create a Logistic classifier to separate the two paragraphs:
            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                Tolerance      = 1e-4, // Let's set some convergence parameters
                Iterations     = 100,  // maximum number of iterations to perform
                Regularization = 0
            };

            // Now, we use the learning algorithm to learn the distinction between the two:
            LogisticRegression reg = learner.Learn(new[] { bow1, bow2 }, new[] { false, true });

            // Finally, we can predict using the classifier:
            bool c1 = reg.Decide(bow1); // Should be false
            bool c2 = reg.Decide(bow2); // Should be true
            #endregion

            Assert.AreEqual(bow1.Length, 99);
            Assert.AreEqual(bow2.Length, 99);

            Assert.AreEqual(bow1.Sum(), 67);
            Assert.AreEqual(bow2.Sum(), 63);

            Assert.IsFalse(c1);
            Assert.IsTrue(c2);
        }
Esempio n. 16
0
        /// <summary>
        /// Initiates the required components and runs 4 accuracy tests
        /// Uses 90 tweets for training
        /// uses 30 tweets for testing
        /// </summary>
        /// <param name="inputFile">Tweets</param>
        /// <param name="outputFile">Labels</param>
        public void TestNaiveBayes(string inputFile, string outputFile)
        {
            //Create training features
            //4 sets of 90
            string[][] tokens1 = ReadInputEx(inputFile, 0, 29);
            string[][] tokens2 = ReadInputEx(inputFile, 30, 59);
            string[][] tokens3 = ReadInputEx(inputFile, 60, 89);
            string[][] tokens4 = ReadInputEx(inputFile, 90, 119);

            //Read training output
            int[] outputs1 = ReadOutputEx(outputFile, 0, 29);
            int[] outputs2 = ReadOutputEx(outputFile, 30, 59);
            int[] outputs3 = ReadOutputEx(outputFile, 60, 89);
            int[] outputs4 = ReadOutputEx(outputFile, 90, 119);

            //Create BOW for each training set
            BagOfWords bow1 = new BagOfWords()
            {
                MaximumOccurance = 1
            };

            bow1.Learn(tokens1);

            BagOfWords bow2 = new BagOfWords()
            {
                MaximumOccurance = 1
            };

            bow2.Learn(tokens2);

            BagOfWords bow3 = new BagOfWords()
            {
                MaximumOccurance = 1
            };

            bow3.Learn(tokens3);

            BagOfWords bow4 = new BagOfWords()
            {
                MaximumOccurance = 1
            };

            bow4.Learn(tokens4);

            //Transform to feature vector
            double[][] inputs1 = bow1.Transform(tokens1);
            double[][] inputs2 = bow2.Transform(tokens2);
            double[][] inputs3 = bow3.Transform(tokens3);
            double[][] inputs4 = bow4.Transform(tokens4);

            //Create teachers
            var teacher1 = new NaiveBayesLearning <NormalDistribution>();

            teacher1.Options.InnerOption = new NormalOptions
            {
                Regularization = 1e-6 // to avoid zero variances
            };
            var teacher2 = new NaiveBayesLearning <NormalDistribution>();

            teacher2.Options.InnerOption = new NormalOptions
            {
                Regularization = 1e-6 // to avoid zero variances
            };
            var teacher3 = new NaiveBayesLearning <NormalDistribution>();

            teacher3.Options.InnerOption = new NormalOptions
            {
                Regularization = 1e-6 // to avoid zero variances
            };
            var teacher4 = new NaiveBayesLearning <NormalDistribution>();

            teacher4.Options.InnerOption = new NormalOptions
            {
                Regularization = 1e-6 // to avoid zero variances
            };

            //Create the Naive Bayes
            var nb1 = teacher1.Learn(inputs1, outputs1);
            var nb2 = teacher2.Learn(inputs2, outputs2);
            var nb3 = teacher3.Learn(inputs3, outputs3);
            var nb4 = teacher4.Learn(inputs4, outputs4);

            //Create the training sets
            //the remaining 30
            double[][] testInputs1  = bow1.Transform(ReadInputIn(inputFile, 0, 30));
            double[][] testInputs2  = bow2.Transform(ReadInputIn(inputFile, 30, 60));
            double[][] testInputs3  = bow3.Transform(ReadInputIn(inputFile, 60, 90));
            double[][] testInputs4  = bow4.Transform(ReadInputIn(inputFile, 90, 120));
            int[]      testOutputs1 = ReadOutputIn(outputFile, 0, 30);
            int[]      testOutputs2 = ReadOutputIn(outputFile, 30, 60);
            int[]      testOutputs3 = ReadOutputIn(outputFile, 60, 90);
            int[]      testOutputs4 = ReadOutputIn(outputFile, 90, 120);

            //predict answers
            int[] answers1 = nb1.Decide(testInputs1);
            int[] answers2 = nb2.Decide(testInputs2);
            int[] answers3 = nb3.Decide(testInputs3);
            int[] answers4 = nb4.Decide(testInputs4);

            int correct1 = 0;
            int correct2 = 0;
            int correct3 = 0;
            int correct4 = 0;

            int[][] confusionMatrix = new int[3][];
            confusionMatrix[0] = new int[3] {
                0, 0, 0
            };
            confusionMatrix[1] = new int[3] {
                0, 0, 0
            };
            confusionMatrix[2] = new int[3] {
                0, 0, 0
            };


            for (int i = 0; i < testOutputs1.Length; i++)
            {
                confusionMatrix[testOutputs1[i]][answers1[i]]++;
                confusionMatrix[testOutputs2[i]][answers2[i]]++;
                confusionMatrix[testOutputs3[i]][answers3[i]]++;
                confusionMatrix[testOutputs4[i]][answers4[i]]++;
                if (answers1[i] == testOutputs1[i])
                {
                    correct1++;
                }
                if (answers2[i] == testOutputs2[i])
                {
                    correct2++;
                }
                if (answers3[i] == testOutputs3[i])
                {
                    correct3++;
                }
                if (answers4[i] == testOutputs4[i])
                {
                    correct4++;
                }
            }

            double accuracy1       = ((double)correct1 / 30);
            double accuracy2       = ((double)correct2 / 30);
            double accuracy3       = ((double)correct3 / 30);
            double accuracy4       = ((double)correct4 / 30);
            double averageAccuracy = (((double)(correct1 + correct2 + correct3 + correct4)) / 120);

            Console.WriteLine(accuracy1 + " " + accuracy2 + " " + accuracy3 + " " + accuracy4);
            Console.WriteLine(averageAccuracy);


            Console.WriteLine();
            for (int i = 0; i < 3; i++)
            {
                for (int j = 0; j < 3; j++)
                {
                    Console.Write(confusionMatrix[i][j] + " ");
                }
                Console.WriteLine();
            }
        }