public void TestLanguageDiscover() { var t = new BayesSimpleTextClassifier(); t.Train("french", "le la les du un une je il elle de en"); t.Train("german", "der die das ein eine"); t.Train("spanish", "el uno una las de la en"); t.Train("english", "the it she he they them are were to"); t.Train("english", "the rain in spain falls mainly on the plain"); var res = t.Classify("uno das je de la elle in"); Assert.AreEqual(4, res.Count); Assert.AreEqual(0.9999, res["english"]); Assert.AreEqual(0.9999, res["german"]); Assert.AreEqual(0.67285006523593538, res["french"]); Assert.AreEqual(0.58077905232271598d, res["spanish"]); }
public void TestRobinsonFisherCombiner() { var t = new BayesSimpleTextClassifier(); t.Train("Alimentação", "Ipiranga AMPM"); t.Train("Alimentação", "Restaurante Bobs"); t.Train("Combustível", "Posto Ipiranga"); var res = t.Classify("Restaurante Ipiranga"); Assert.AreEqual(2, res.Count()); Assert.AreEqual(0.84415961583962162, res["Alimentação"]); Assert.AreEqual(0.33333333333333326, res["Combustível"]); t = new BayesSimpleTextClassifier(new SimpleTextTokenizer(), new RobinsonFisherCombiner()); t.Train("Alimentação", "IPIRANGA AMPM"); t.Train("Alimentação", "Restaurante Bobs"); t.Train("Combustível", "Posto Ipiranga"); res = t.Classify("Restaurante Ipiranga"); Assert.AreEqual(2, res.Count()); Assert.AreEqual(0.99481185089082513, res["Alimentação"]); Assert.AreEqual(0.38128034540863015, res["Combustível"]); }
public void TestCatsAndDogs() { var ignoreList = new List <string> { "the", "my", "i", "dont" }; var cls = new BayesSimpleTextClassifier(new SimpleTextTokenizer(true, ignoreList)); cls.Train("dog", "Dogs are awesome, cats too. I love my dog"); cls.Train("cat", "Cats are more preferred by software developers. I never could stand cats. I have a dog"); cls.Train("dog", "My dog's name is Willy. He likes to play with my wife's cat all day long. I love dogs"); cls.Train("cat", "Cats are difficult animals, unlike dogs, really annoying, I hate them all"); cls.Train("dog", "So which one should you choose? A dog, definitely."); cls.Train("cat", "The favorite food for cats is bird meat, although mice are good, but birds are a delicacy"); cls.Train("dog", "A dog will eat anything, including birds or whatever meat"); cls.Train("cat", "My cat's favorite place to purr is on my keyboard"); cls.Train("dog", "My dog's favorite place to take a leak is the tree in front of our house"); Assert.AreEqual("cat", cls.Classify("This test is about cats.").First().Key); Assert.AreEqual("cat", cls.Classify("I hate ...").First().Key); Assert.AreEqual("cat", cls.Classify("The most annoying animal on earth.").First().Key); Assert.AreEqual("cat", cls.Classify("My precious, my favorite!").First().Key); Assert.AreEqual("cat", cls.Classify("Get off my keyboard!").First().Key); Assert.AreEqual("cat", cls.Classify("Kill that bird!").First().Key); Assert.AreEqual("dog", cls.Classify("This test is about dogs.").First().Key); Assert.AreEqual("dog", cls.Classify("Cats or Dogs?").First().Key); Assert.AreEqual("dog", cls.Classify("What pet will I love more?").First().Key); Assert.AreEqual("cat", cls.Classify("Willy, where the heck are you?").First().Key); Assert.AreEqual("dog", cls.Classify("Why is the front door of our house open?").First().Key); var res = cls.Classify("The preferred company of software developers."); Assert.AreEqual(2, res.Count); Assert.AreEqual(0.9999, res["cat"]); Assert.AreEqual(0.9999, res["dog"]); }
public void TestCatsAndDogs() { var ignoreList = new List<string> {"the", "my", "i", "dont"}; var cls = new BayesSimpleTextClassifier(new SimpleTextTokenizer(true, ignoreList)); cls.Train("dog", "Dogs are awesome, cats too. I love my dog"); cls.Train("cat", "Cats are more preferred by software developers. I never could stand cats. I have a dog"); cls.Train("dog", "My dog's name is Willy. He likes to play with my wife's cat all day long. I love dogs"); cls.Train("cat", "Cats are difficult animals, unlike dogs, really annoying, I hate them all"); cls.Train("dog", "So which one should you choose? A dog, definitely."); cls.Train("cat", "The favorite food for cats is bird meat, although mice are good, but birds are a delicacy"); cls.Train("dog", "A dog will eat anything, including birds or whatever meat"); cls.Train("cat", "My cat's favorite place to purr is on my keyboard"); cls.Train("dog", "My dog's favorite place to take a leak is the tree in front of our house"); Assert.AreEqual("cat", cls.Classify("This test is about cats.").First().Key); Assert.AreEqual("cat", cls.Classify("I hate ...").First().Key); Assert.AreEqual("cat", cls.Classify("The most annoying animal on earth.").First().Key); Assert.AreEqual("cat", cls.Classify("My precious, my favorite!").First().Key); Assert.AreEqual("cat", cls.Classify("Get off my keyboard!").First().Key); Assert.AreEqual("cat", cls.Classify("Kill that bird!").First().Key); Assert.AreEqual("dog", cls.Classify("This test is about dogs.").First().Key); Assert.AreEqual("dog",cls.Classify("Cats or Dogs?").First().Key); Assert.AreEqual("dog",cls.Classify("What pet will I love more?").First().Key); Assert.AreEqual("cat",cls.Classify("Willy, where the heck are you?").First().Key); Assert.AreEqual("dog",cls.Classify("Why is the front door of our house open?").First().Key); var res = cls.Classify("The preferred company of software developers."); Assert.AreEqual(2, res.Count); Assert.AreEqual(0.9999, res["cat"]); Assert.AreEqual(0.9999, res["dog"]); }
public void TestNewTag() { var t = new BayesSimpleTextClassifier(); t.AddTag("teste"); Assert.IsNotNull(t.GetTagById("teste")); }
public void TestChangeTag() { var t = new BayesSimpleTextClassifier(); t.Train("teste", "Bla"); Assert.IsNull(t.GetTagById("teste2")); t.ChangeTagId("teste", "teste2"); Assert.IsNull(t.GetTagById("teste")); Assert.IsNotNull(t.GetTagById("teste2")); }
public void TestRemoveTag() { var t = new BayesSimpleTextClassifier(); t.Train("teste", "Bla"); Assert.IsNotNull(t.GetTagById("teste")); t.RemoveTag("teste"); Assert.IsNull(t.GetTagById("teste")); }
public void TestUntrain() { var t = new BayesSimpleTextClassifier(); t.Train("teste", "Afonso França"); t.Untrain("teste", "França"); var res = t.Classify("França"); Assert.AreEqual(0, res.Count); }
public void TestSpanHam() { var t = new BayesSimpleTextClassifier(); t.Train("span", "bad"); t.Train("ham", "good"); var res = t.Classify("this is a bad sentence"); Assert.AreEqual(1, res.Count); Assert.AreEqual(0.9999, res["span"]); }
public void TestTagIds() { var t = new BayesSimpleTextClassifier(); t.Train("teste", "Afonso França"); t.Train("teste1", "Afonso França"); var res = t.TagIds().ToList(); Assert.AreEqual(2, res.Count()); Assert.AreEqual("teste", res[0]); Assert.AreEqual("teste1", res[1]); }
public void TestMergeTags() { var t = new BayesSimpleTextClassifier(); t.Train("bom", "gordo"); t.Train("mal", "magro"); var output = t.Classify("gordo magro"); Assert.AreEqual(2, output.Count); Assert.AreEqual(0.9999, output["bom"]); Assert.AreEqual(0.9999, output["mal"]); t.MergeTags("mal", "bom"); output = t.Classify("gordo magro"); Assert.AreEqual(1, output.Count); Assert.AreEqual(0.9999, output["bom"]); }
// GET: Degems/Search public ActionResult algo() { var bayesCLS = new BayesSimpleTextClassifier(); //Naive Bayes object https://github.com/afonsof/BayesSharp var degems = db.Degems.Include(d => d.Brand); // take list of all cars List <string> goodCar = db.TrainingDatas.Where(g => g.title.Equals("good")).Select(g => g.title).ToList(); //list of words for good car List <string> badCar = db.TrainingDatas.Where(g => g.title.Equals("bad")).Select(g => g.title).ToList(); //list of words for bad car foreach (var good in goodCar)// trains the good part { bayesCLS.Train("good", good); } foreach (var bad in badCar)// trains the bad part { bayesCLS.Train("bad", bad); } var maxScore = -1.0; //save the computed score var favoriteCar = new Degem(); //save the bast car foreach (var car in degems) // move on each car and check the score { //save the score var good = 0.0; var bad = 0.0; foreach (var p in car.Comments) // move on each post and check the score { var result = bayesCLS.Classify(p.ContentInfo); //:) if (result.ContainsKey("good")) { //check if have any result good += result["good"] / car.Comments.Count(); //if yes normelaize it and save it } if (result.ContainsKey("bad")) { bad += result["bad"] / car.Comments.Count(); } } if (good - bad > maxScore)//check the current car score { maxScore = good - bad; favoriteCar = car;//if is max save it } } return(View("Details", favoriteCar));//return the bast car }
public void TestSaveAndLoad() { var path = new FileInfo(new System.Uri(Assembly.GetExecutingAssembly().CodeBase).AbsolutePath).Directory.FullName + @"\bayes.json"; var t = new BayesSimpleTextClassifier(); t.Train("teste", "Afonso França"); t.Save(path); var output = t.Classify("Afonso França"); Assert.AreEqual(1, output.Count); Assert.AreEqual(0.9999, output["teste"]); var t1 = new BayesSimpleTextClassifier(); t1.Load(path); output = t1.Classify("Afonso França"); Assert.AreEqual(1, output.Count); Assert.AreEqual(0.9999, output["teste"]); }
public static void ProcessParameters(this SerializedNotification.Parameter o, GridClient Client, Configuration corradeConfiguration, string type, List <object> args, Dictionary <string, string> store, object sync, LanguageDetector languageDetector, BayesSimpleTextClassifier bayesSimpleClassifier) { object value; switch (o.Value != null) { case false: var arg = o.Type != null ? args.AsParallel() .FirstOrDefault( a => string.Equals(a.GetType().FullName, o.Type) || a.GetType() .GetBaseTypes() .AsParallel() .Any(t => string.Equals(t.FullName, o.Type))) : args.AsParallel() .FirstOrDefault( a => string.Equals(a.GetType().FullName, type) || a.GetType() .GetBaseTypes() .AsParallel() .Any(t => string.Equals(t.FullName, o.Type))); if (arg == null) { return; } // Process all conditions and return if they all fail. if (o.Condition != null) { if ( o.Condition.AsParallel() .Select(condition => new { condition, conditional = arg.GetFP(condition.Path) }) .Where(t => t.conditional != null && !t.conditional.Equals(t.condition.Value)) .Select(t => t.condition).Any()) { return; } } value = arg.GetFP(o.Path); if (o.Processing != null) { foreach (var process in o.Processing) { if (process.ToLower != null) { value = process.ToLower.Culture != null ? value.ToString().ToLower() : value.ToString().ToLowerInvariant(); continue; } if (process.GetValue != null) { IDictionary iDict = null; var dict = arg.GetFP(process.GetValue.Path); var internalDictionaryInfo = dict.GetType() .GetField("Dictionary", BindingFlags.Default | BindingFlags.CreateInstance | BindingFlags.Instance | BindingFlags.NonPublic); if (dict is IDictionary) { iDict = dict as IDictionary; goto PROCESS; } if (internalDictionaryInfo != null) { iDict = internalDictionaryInfo.GetValue(dict) as IDictionary; } PROCESS: if (iDict != null) { var look = arg.GetFP(process.GetValue.Value); if (!iDict.Contains(look)) { continue; } value = process.GetValue.Get != null ? iDict[look].GetFP(process.GetValue.Get) : iDict[look]; } continue; } if (process.ConditionalSubstitution != null) { dynamic l = null; dynamic r = null; if (process.ConditionalSubstitution.Type != null) { arg = args.AsParallel() .FirstOrDefault( a => string.Equals(a.GetType().FullName, process.ConditionalSubstitution.Type)); if (arg != null) { l = arg.GetFP(process.ConditionalSubstitution.Path); r = process.ConditionalSubstitution.Check; } } if (l == null || r == null) { continue; } if (l == r) { value = process.ConditionalSubstitution.Value; break; } continue; } if (process.TernarySubstitution != null) { dynamic l = null; dynamic r = null; if (process.TernarySubstitution.Type != null) { arg = args.AsParallel() .FirstOrDefault( a => string.Equals(a.GetType().FullName, process.TernarySubstitution.Type)); if (arg != null) { l = arg.GetFP(process.TernarySubstitution.Path); r = process.TernarySubstitution.Value; } } if (l == null || r == null) { continue; } value = l == r ? process.TernarySubstitution.Left : process.TernarySubstitution.Right; continue; } if (process.Resolve != null) { switch (process.Resolve.ResolveType) { case SerializedNotification.ResolveType.AGENT: switch (process.Resolve.ResolveDestination) { case SerializedNotification.ResolveDestination.UUID: var fullName = new List <string>(wasOpenMetaverse.Helpers.GetAvatarNames(value as string)); var agentUUID = UUID.Zero; if (!fullName.Any() || !Resolvers.AgentNameToUUID(Client, fullName.First(), fullName.Last(), corradeConfiguration.ServicesTimeout, corradeConfiguration.DataTimeout, new DecayingAlarm(corradeConfiguration.DataDecayType), ref agentUUID)) { break; } value = agentUUID; break; } break; } continue; } if (process.ToEnumMemberName != null && process.ToEnumMemberName.Type != null && process.ToEnumMemberName.Assembly != null) { value = Enum.GetName( Assembly.Load(process.ToEnumMemberName.Assembly) .GetType(process.ToEnumMemberName.Type), value); continue; } if (process.NameSplit != null) { if (process.NameSplit.Condition != null) { var nameSplitCondition = arg.GetFP(process.NameSplit.Condition.Path); if (!nameSplitCondition.Equals(process.NameSplit.Condition.Value)) { continue; } } var fullName = new List <string>(wasOpenMetaverse.Helpers.GetAvatarNames(value as string)); if (fullName.Any()) { lock (sync) { store.Add(process.NameSplit.First, fullName.First()); store.Add(process.NameSplit.Last, fullName.Last()); } } return; } if (process.IdentifyLanguage != null) { var detectedLanguage = languageDetector.Detect(value as string); if (detectedLanguage != null) { lock (sync) { store.Add(process.IdentifyLanguage.Name, detectedLanguage); } } continue; } if (process.BayesClassify != null) { var bayesClassification = bayesSimpleClassifier.Classify(value as string).FirstOrDefault(); if (!bayesClassification.Equals(default(KeyValuePair <string, double>))) { lock (sync) { store.Add(process.BayesClassify.Name, CSV.FromKeyValue(bayesClassification)); } } continue; } if (process.Method != null) { Type methodType; switch (process.Method.Assembly != null) { case true: methodType = Assembly.Load(process.Method.Assembly).GetType(process.Method.Type); break; default: methodType = Type.GetType(process.Method.Type); break; } object instance; try { instance = Activator.CreateInstance(methodType); } catch (Exception) { instance = null; } switch (process.Method.Parameters != null) { case true: value = methodType.GetMethod(process.Method.Name, process.Method.Parameters.Values.Select(Type.GetType).ToArray()) .Invoke(instance, process.Method.Parameters.Keys.Select(arg.GetFP).ToArray()); break; default: value = methodType.GetMethod(process.Method.Name) .Invoke( Activator.CreateInstance(methodType).GetFP(process.Method.Path), null); break; } break; } } } break; default: if (!args.AsParallel().Any(a => string.Equals(a.GetType().FullName, type))) { return; } value = o.Value; break; } var data = new HashSet <string>(wasOpenMetaverse.Reflection.wasSerializeObject(value)); if (!data.Any()) { return; } var output = CSV.FromEnumerable(data); if (data.Count.Equals(1)) { output = data.First().Trim('"'); } lock (sync) { store.Add(o.Name, output); } }
static void Main(string[] args) { TXL txl = new TXL("stopwords.txt"); BayesSimpleTextClassifier classifier = new BayesSimpleTextClassifier(); List <DGLop> DG = new List <DGLop>(); List <string> label_before = new List <string>(); List <string> label_after = new List <string>(); List <string> testlist = new List <string>(); DirectoryInfo mydir = new DirectoryInfo(@"class\"); FileInfo[] f = mydir.GetFiles(); foreach (FileInfo file in f) { StreamReader sr = new StreamReader(file.FullName); string line = ""; DGLop tempDG = new DGLop(); tempDG.name = Path.GetFileNameWithoutExtension(file.Name); DG.Add(tempDG); while ((line = sr.ReadLine()) != null) { classifier.Train(tempDG.name, txl.tienXuLy(line)); } sr.Close(); } StreamReader testinput = new StreamReader(@"test\tests.txt"); StreamWriter resultoutput = new StreamWriter(@"test\results.txt"); string str; while ((str = testinput.ReadLine()) != null) { label_before.Add(str.Split(' ').Last()); str = str.Replace(" " + label_before.Last(), string.Empty); testlist.Add(str); Dictionary <string, double> score = classifier.Classify(str); label_after.Add(score.First().Key); resultoutput.WriteLine(testlist.Last() + " " + label_after.Last()); } for (int i = 0; i < DG.Count; ++i) { for (int j = 0; j < label_after.Count; ++j) { if (label_before[j].Equals(DG[i].name)) { ++DG[i].before_count; } if (label_after[j].Equals(DG[i].name)) { ++DG[i].after_count; if (label_after[j].Equals(label_before[j])) { ++DG[i].acc_count; } } } DG[i].p = (double)DG[i].acc_count / DG[i].after_count; DG[i].r = (double)DG[i].acc_count / DG[i].before_count; DG[i].f = 2 * DG[i].p * DG[i].r / (DG[i].p + DG[i].r); } int C_count = DG.Count; double p_macro = 0, r_macro = 0, f_macro = 0, f_micro = 0; for (int i = 0; i < C_count; ++i) { p_macro += DG[i].p; r_macro += DG[i].r; f_micro += DG[i].acc_count; } p_macro = p_macro / C_count; r_macro = r_macro / C_count; f_macro = 2 * p_macro * r_macro / (p_macro + r_macro); f_micro = f_micro / (double)testlist.Count; for (int i = 0; i < DG.Count; ++i) { resultoutput.WriteLine("P {0}: {1}", DG[i].name, DG[i].p * 100); resultoutput.WriteLine("R {0}: {1}", DG[i].name, DG[i].r * 100); resultoutput.WriteLine("F {0}: {1}", DG[i].name, DG[i].f * 100); } resultoutput.WriteLine("F_macro: {0}", f_macro * 100); resultoutput.WriteLine("F_micro: {0}", f_micro * 100); testinput.Close(); resultoutput.Close(); }