Example #1
1
        public void TestLanguageDiscover()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("french", "le la les du un une je il elle de en");
            t.Train("german", "der die das ein eine");
            t.Train("spanish", "el uno una las de la en");
            t.Train("english", "the it she he they them are were to");
            t.Train("english", "the rain in spain falls mainly on the plain");
            var res = t.Classify("uno das je de la elle in");

            Assert.AreEqual(4, res.Count);
            Assert.AreEqual(0.9999, res["english"]);
            Assert.AreEqual(0.9999, res["german"]);
            Assert.AreEqual(0.67285006523593538, res["french"]);
            Assert.AreEqual(0.58077905232271598d, res["spanish"]);
        }
Example #2
0
        public void TestRobinsonFisherCombiner()
        {
            var t = new BayesSimpleTextClassifier();

            t.Train("Alimentação", "Ipiranga AMPM");
            t.Train("Alimentação", "Restaurante Bobs");
            t.Train("Combustível", "Posto Ipiranga");

            var res = t.Classify("Restaurante Ipiranga");

            Assert.AreEqual(2, res.Count());
            Assert.AreEqual(0.84415961583962162, res["Alimentação"]);
            Assert.AreEqual(0.33333333333333326, res["Combustível"]);


            t = new BayesSimpleTextClassifier(new SimpleTextTokenizer(), new RobinsonFisherCombiner());
            t.Train("Alimentação", "IPIRANGA AMPM");
            t.Train("Alimentação", "Restaurante Bobs");
            t.Train("Combustível", "Posto Ipiranga");

            res = t.Classify("Restaurante Ipiranga");
            Assert.AreEqual(2, res.Count());
            Assert.AreEqual(0.99481185089082513, res["Alimentação"]);
            Assert.AreEqual(0.38128034540863015, res["Combustível"]);
        }
Example #3
0
        public void TestCatsAndDogs()
        {
            var ignoreList = new List <string> {
                "the", "my", "i", "dont"
            };
            var cls = new BayesSimpleTextClassifier(new SimpleTextTokenizer(true, ignoreList));

            cls.Train("dog", "Dogs are awesome, cats too. I love my dog");
            cls.Train("cat", "Cats are more preferred by software developers. I never could stand cats. I have a dog");
            cls.Train("dog", "My dog's name is Willy. He likes to play with my wife's cat all day long. I love dogs");
            cls.Train("cat", "Cats are difficult animals, unlike dogs, really annoying, I hate them all");
            cls.Train("dog", "So which one should you choose? A dog, definitely.");
            cls.Train("cat", "The favorite food for cats is bird meat, although mice are good, but birds are a delicacy");
            cls.Train("dog", "A dog will eat anything, including birds or whatever meat");
            cls.Train("cat", "My cat's favorite place to purr is on my keyboard");
            cls.Train("dog", "My dog's favorite place to take a leak is the tree in front of our house");

            Assert.AreEqual("cat", cls.Classify("This test is about cats.").First().Key);
            Assert.AreEqual("cat", cls.Classify("I hate ...").First().Key);
            Assert.AreEqual("cat", cls.Classify("The most annoying animal on earth.").First().Key);
            Assert.AreEqual("cat", cls.Classify("My precious, my favorite!").First().Key);
            Assert.AreEqual("cat", cls.Classify("Get off my keyboard!").First().Key);
            Assert.AreEqual("cat", cls.Classify("Kill that bird!").First().Key);
            Assert.AreEqual("dog", cls.Classify("This test is about dogs.").First().Key);
            Assert.AreEqual("dog", cls.Classify("Cats or Dogs?").First().Key);
            Assert.AreEqual("dog", cls.Classify("What pet will I love more?").First().Key);
            Assert.AreEqual("cat", cls.Classify("Willy, where the heck are you?").First().Key);
            Assert.AreEqual("dog", cls.Classify("Why is the front door of our house open?").First().Key);

            var res = cls.Classify("The preferred company of software developers.");

            Assert.AreEqual(2, res.Count);
            Assert.AreEqual(0.9999, res["cat"]);
            Assert.AreEqual(0.9999, res["dog"]);
        }
Example #4
0
        public void TestCatsAndDogs()
        {
            var ignoreList = new List<string> {"the", "my", "i", "dont"};
            var cls = new BayesSimpleTextClassifier(new SimpleTextTokenizer(true, ignoreList));
            cls.Train("dog", "Dogs are awesome, cats too. I love my dog");
            cls.Train("cat", "Cats are more preferred by software developers. I never could stand cats. I have a dog");
            cls.Train("dog", "My dog's name is Willy. He likes to play with my wife's cat all day long. I love dogs");
            cls.Train("cat", "Cats are difficult animals, unlike dogs, really annoying, I hate them all");
            cls.Train("dog", "So which one should you choose? A dog, definitely.");
            cls.Train("cat", "The favorite food for cats is bird meat, although mice are good, but birds are a delicacy");
            cls.Train("dog", "A dog will eat anything, including birds or whatever meat");
            cls.Train("cat", "My cat's favorite place to purr is on my keyboard");
            cls.Train("dog", "My dog's favorite place to take a leak is the tree in front of our house");

            Assert.AreEqual("cat", cls.Classify("This test is about cats.").First().Key);
            Assert.AreEqual("cat", cls.Classify("I hate ...").First().Key);
            Assert.AreEqual("cat", cls.Classify("The most annoying animal on earth.").First().Key);
            Assert.AreEqual("cat", cls.Classify("My precious, my favorite!").First().Key);
            Assert.AreEqual("cat", cls.Classify("Get off my keyboard!").First().Key);
            Assert.AreEqual("cat", cls.Classify("Kill that bird!").First().Key);
            Assert.AreEqual("dog", cls.Classify("This test is about dogs.").First().Key);
            Assert.AreEqual("dog",cls.Classify("Cats or Dogs?").First().Key);
            Assert.AreEqual("dog",cls.Classify("What pet will I love more?").First().Key);
            Assert.AreEqual("cat",cls.Classify("Willy, where the heck are you?").First().Key);
            Assert.AreEqual("dog",cls.Classify("Why is the front door of our house open?").First().Key);

            var res = cls.Classify("The preferred company of software developers.");
            Assert.AreEqual(2, res.Count);
            Assert.AreEqual(0.9999, res["cat"]);
            Assert.AreEqual(0.9999, res["dog"]);
        }
Example #5
0
        public void TestSpanHam()
        {
            var t = new BayesSimpleTextClassifier();

            t.Train("span", "bad");
            t.Train("ham", "good");

            var res = t.Classify("this is a bad sentence");

            Assert.AreEqual(1, res.Count);
            Assert.AreEqual(0.9999, res["span"]);
        }
Example #6
0
        public void TestTagIds()
        {
            var t = new BayesSimpleTextClassifier();

            t.Train("teste", "Afonso França");
            t.Train("teste1", "Afonso França");

            var res = t.TagIds().ToList();

            Assert.AreEqual(2, res.Count());
            Assert.AreEqual("teste", res[0]);
            Assert.AreEqual("teste1", res[1]);
        }
Example #7
0
        public void TestMergeTags()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("bom", "gordo");
            t.Train("mal", "magro");
            var output = t.Classify("gordo magro");

            Assert.AreEqual(2, output.Count);
            Assert.AreEqual(0.9999, output["bom"]);
            Assert.AreEqual(0.9999, output["mal"]);

            t.MergeTags("mal", "bom");
            output = t.Classify("gordo magro");

            Assert.AreEqual(1, output.Count);
            Assert.AreEqual(0.9999, output["bom"]);
        }
Example #8
0
        public void TestLanguageDiscover()
        {
            var t = new BayesSimpleTextClassifier();

            t.Train("french", "le la les du un une je il elle de en");
            t.Train("german", "der die das ein eine");
            t.Train("spanish", "el uno una las de la en");
            t.Train("english", "the it she he they them are were to");
            t.Train("english", "the rain in spain falls mainly on the plain");
            var res = t.Classify("uno das je de la elle in");

            Assert.AreEqual(4, res.Count);
            Assert.AreEqual(0.9999, res["english"]);
            Assert.AreEqual(0.9999, res["german"]);
            Assert.AreEqual(0.67285006523593538, res["french"]);
            Assert.AreEqual(0.58077905232271598d, res["spanish"]);
        }
Example #9
0
 public void TestChangeTag()
 {
     var t = new BayesSimpleTextClassifier();
     t.Train("teste", "Bla");
     Assert.IsNull(t.GetTagById("teste2"));
     t.ChangeTagId("teste", "teste2");
     Assert.IsNull(t.GetTagById("teste"));
     Assert.IsNotNull(t.GetTagById("teste2"));
 }
Example #10
0
        public void TestRemoveTag()
        {
            var t = new BayesSimpleTextClassifier();

            t.Train("teste", "Bla");
            Assert.IsNotNull(t.GetTagById("teste"));
            t.RemoveTag("teste");
            Assert.IsNull(t.GetTagById("teste"));
        }
Example #11
0
        public void TestMergeTags()
        {
            var t = new BayesSimpleTextClassifier();

            t.Train("bom", "gordo");
            t.Train("mal", "magro");
            var output = t.Classify("gordo magro");

            Assert.AreEqual(2, output.Count);
            Assert.AreEqual(0.9999, output["bom"]);
            Assert.AreEqual(0.9999, output["mal"]);

            t.MergeTags("mal", "bom");
            output = t.Classify("gordo magro");

            Assert.AreEqual(1, output.Count);
            Assert.AreEqual(0.9999, output["bom"]);
        }
Example #12
0
        // GET: Degems/Search
        public ActionResult algo()
        {
            var           bayesCLS = new BayesSimpleTextClassifier();                                                   //Naive Bayes object https://github.com/afonsof/BayesSharp
            var           degems   = db.Degems.Include(d => d.Brand);                                                   // take list of all cars
            List <string> goodCar  = db.TrainingDatas.Where(g => g.title.Equals("good")).Select(g => g.title).ToList(); //list of words for good car
            List <string> badCar   = db.TrainingDatas.Where(g => g.title.Equals("bad")).Select(g => g.title).ToList();  //list of words for bad car


            foreach (var good in goodCar)// trains the good part
            {
                bayesCLS.Train("good", good);
            }
            foreach (var bad in badCar)// trains the bad part
            {
                bayesCLS.Train("bad", bad);
            }
            var maxScore    = -1.0;        //save the computed score
            var favoriteCar = new Degem(); //save the bast car

            foreach (var car in degems)    // move on each car and check the score
            {
                //save the score
                var good = 0.0;
                var bad  = 0.0;
                foreach (var p in car.Comments)                        // move on each post and check the score
                {
                    var result = bayesCLS.Classify(p.ContentInfo);     //:)
                    if (result.ContainsKey("good"))
                    {                                                  //check if have any result
                        good += result["good"] / car.Comments.Count(); //if yes normelaize it and save it
                    }
                    if (result.ContainsKey("bad"))
                    {
                        bad += result["bad"] / car.Comments.Count();
                    }
                }
                if (good - bad > maxScore)//check the current car score
                {
                    maxScore    = good - bad;
                    favoriteCar = car;//if is max save it
                }
            }
            return(View("Details", favoriteCar));//return the bast car
        }
Example #13
0
        public void TestUntrain()
        {
            var t = new BayesSimpleTextClassifier();

            t.Train("teste", "Afonso França");
            t.Untrain("teste", "França");

            var res = t.Classify("França");

            Assert.AreEqual(0, res.Count);
        }
Example #14
0
        public void TestSaveAndLoad()
        {
            var path = new FileInfo(new System.Uri(Assembly.GetExecutingAssembly().CodeBase).AbsolutePath).Directory.FullName + @"\bayes.json";
            var t    = new BayesSimpleTextClassifier();

            t.Train("teste", "Afonso França");
            t.Save(path);
            var output = t.Classify("Afonso França");

            Assert.AreEqual(1, output.Count);
            Assert.AreEqual(0.9999, output["teste"]);

            var t1 = new BayesSimpleTextClassifier();

            t1.Load(path);
            output = t1.Classify("Afonso França");

            Assert.AreEqual(1, output.Count);
            Assert.AreEqual(0.9999, output["teste"]);
        }
        public BlogController()
        {
            string appPath  = HttpRuntime.AppDomainAppPath;                //Application path
            string filePath = appPath + "/Content/" + "training_data.csv"; //Training data

            var reader = new StreamReader(filePath);

            List <string> listA = new List <string>(); //Text (x)
            List <string> listB = new List <string>(); //Label (y)

            while (!reader.EndOfStream)
            {
                var line   = reader.ReadLine();
                var values = line.Split(',');

                listA.Add(values[0]);
                listB.Add(values[1]);
            }

            for (int i = 0; i < listA.Count(); i++)
            {
                c.Train(listB[i], listA[i]); //Trining part
            }
        }
Example #16
0
        public void TestTagIds()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("teste", "Afonso França");
            t.Train("teste1", "Afonso França");

            var res = t.TagIds().ToList();
            Assert.AreEqual(2, res.Count());
            Assert.AreEqual("teste", res[0]);
            Assert.AreEqual("teste1", res[1]);
        }
Example #17
0
        public void TestUntrain()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("teste", "Afonso França");
            t.Untrain("teste", "França");

            var res = t.Classify("França");
            Assert.AreEqual(0, res.Count);
        }
Example #18
0
        public void TestSaveAndLoad()
        {
            var path = new FileInfo(new System.Uri(Assembly.GetExecutingAssembly().CodeBase).AbsolutePath).Directory.FullName + @"\bayes.json";
            var t = new BayesSimpleTextClassifier();
            t.Train("teste", "Afonso França");
            t.Save(path);
            var output = t.Classify("Afonso França");
            Assert.AreEqual(1, output.Count);
            Assert.AreEqual(0.9999, output["teste"]);

            var t1 = new BayesSimpleTextClassifier();
            t1.Load(path);
            output = t1.Classify("Afonso França");

            Assert.AreEqual(1, output.Count);
            Assert.AreEqual(0.9999, output["teste"]);
        }
Example #19
0
        public void TestSpanHam()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("span", "bad");
            t.Train("ham", "good");

            var res = t.Classify("this is a bad sentence");
            Assert.AreEqual(1, res.Count);
            Assert.AreEqual(0.9999, res["span"]);
        }
Example #20
0
        static void Main(string[] args)
        {
            TXL txl = new TXL("stopwords.txt");
            BayesSimpleTextClassifier classifier = new BayesSimpleTextClassifier();
            List <DGLop>  DG           = new List <DGLop>();
            List <string> label_before = new List <string>();
            List <string> label_after  = new List <string>();
            List <string> testlist     = new List <string>();

            DirectoryInfo mydir = new DirectoryInfo(@"class\");

            FileInfo[] f = mydir.GetFiles();

            foreach (FileInfo file in f)
            {
                StreamReader sr     = new StreamReader(file.FullName);
                string       line   = "";
                DGLop        tempDG = new DGLop();
                tempDG.name = Path.GetFileNameWithoutExtension(file.Name);
                DG.Add(tempDG);
                while ((line = sr.ReadLine()) != null)
                {
                    classifier.Train(tempDG.name, txl.tienXuLy(line));
                }
                sr.Close();
            }

            StreamReader testinput    = new StreamReader(@"test\tests.txt");
            StreamWriter resultoutput = new StreamWriter(@"test\results.txt");
            string       str;

            while ((str = testinput.ReadLine()) != null)
            {
                label_before.Add(str.Split(' ').Last());
                str = str.Replace(" " + label_before.Last(), string.Empty);

                testlist.Add(str);
                Dictionary <string, double> score = classifier.Classify(str);

                label_after.Add(score.First().Key);

                resultoutput.WriteLine(testlist.Last() + " " + label_after.Last());
            }

            for (int i = 0; i < DG.Count; ++i)
            {
                for (int j = 0; j < label_after.Count; ++j)
                {
                    if (label_before[j].Equals(DG[i].name))
                    {
                        ++DG[i].before_count;
                    }

                    if (label_after[j].Equals(DG[i].name))
                    {
                        ++DG[i].after_count;
                        if (label_after[j].Equals(label_before[j]))
                        {
                            ++DG[i].acc_count;
                        }
                    }
                }

                DG[i].p = (double)DG[i].acc_count / DG[i].after_count;
                DG[i].r = (double)DG[i].acc_count / DG[i].before_count;
                DG[i].f = 2 * DG[i].p * DG[i].r / (DG[i].p + DG[i].r);
            }

            int    C_count = DG.Count;
            double p_macro = 0, r_macro = 0, f_macro = 0, f_micro = 0;

            for (int i = 0; i < C_count; ++i)
            {
                p_macro += DG[i].p;
                r_macro += DG[i].r;
                f_micro += DG[i].acc_count;
            }
            p_macro = p_macro / C_count;
            r_macro = r_macro / C_count;
            f_macro = 2 * p_macro * r_macro / (p_macro + r_macro);
            f_micro = f_micro / (double)testlist.Count;

            for (int i = 0; i < DG.Count; ++i)
            {
                resultoutput.WriteLine("P {0}: {1}", DG[i].name, DG[i].p * 100);
                resultoutput.WriteLine("R {0}: {1}", DG[i].name, DG[i].r * 100);
                resultoutput.WriteLine("F {0}: {1}", DG[i].name, DG[i].f * 100);
            }

            resultoutput.WriteLine("F_macro: {0}", f_macro * 100);
            resultoutput.WriteLine("F_micro: {0}", f_micro * 100);

            testinput.Close();
            resultoutput.Close();
        }
Example #21
0
        public void TestRobinsonFisherCombiner()
        {
            var t = new BayesSimpleTextClassifier();
            t.Train("Alimentação", "Ipiranga AMPM");
            t.Train("Alimentação", "Restaurante Bobs");
            t.Train("Combustível", "Posto Ipiranga");

            var res = t.Classify("Restaurante Ipiranga");
            Assert.AreEqual(2, res.Count());
            Assert.AreEqual(0.84415961583962162, res["Alimentação"]);
            Assert.AreEqual(0.33333333333333326, res["Combustível"]);

            t = new BayesSimpleTextClassifier(new SimpleTextTokenizer(), new RobinsonFisherCombiner());
            t.Train("Alimentação", "IPIRANGA AMPM");
            t.Train("Alimentação", "Restaurante Bobs");
            t.Train("Combustível", "Posto Ipiranga");

            res = t.Classify("Restaurante Ipiranga");
            Assert.AreEqual(2, res.Count());
            Assert.AreEqual(0.99481185089082513, res["Alimentação"]);
            Assert.AreEqual(0.38128034540863015, res["Combustível"]);
        }