public List <Tuple <string, double> > Classify(Sentence sentence, ClassifyOptions options) { var encoder = new OneHotEncoder(); encoder.Words = words; encoder.Encode(sentence); var results = new List <Tuple <string, double> >(); // calculate prop labelDist.ForEach(lf => { var prob = nb.CalPosteriorProb(lf.Value, sentence.Vector, lf.Prob, condProbDictionary); results.Add(new Tuple <string, double>(lf.Value, prob)); }); /*Parallel.ForEach(labelDist, (lf) => * { * nb.Y = lf.Value; * lf.Prob = nb.PosteriorProb(); * });*/ double total = results.Select(x => x.Item2).Sum(); return(results.Select(x => new Tuple <string, double>(x.Item1, x.Item2 / total)).ToList()); }
public ClassifierFactory(ClassifyOptions options, SupportedLanguage lang) { _lang = lang; _options = options; _classifier = new IClassify(); featureExtractor = new IFeatureExtractor(); }
public List <Node[]> GetData(List <Sentence> sentences, ClassifyOptions options) { var extractor = new CountFeatureExtractor(); //var extractor = new Word2VecFeatureExtractor(); extractor.ModelFile = options.Word2VecFilePath; extractor.Sentences = sentences; if (features != null) { extractor.Features = features; } if (dictionary != null) { extractor.Dictionary = dictionary; } extractor.Vectorize(featuresInTfIdf); if (features == null) { features = extractor.Features; } if (dictionary == null) { dictionary = extractor.Dictionary; } List <Node[]> datas = new List <Node[]>(); foreach (var sentence in sentences) { List <Node> curNodes = new List <Node>(); for (int i = 0; i < extractor.Features.Count; i++) { int name = i; /*var xx = sentence.Words.Find(x => x.Lemma == extractor.Features[i]); * * if (xx == null) * { * curNodes.Add(new Node(name, 0)); * } * else * { * curNodes.Add(new Node(name, xx.Vector)); * }*/ curNodes.Add(new Node(i, sentence.Vector[i])); } datas.Add(curNodes.ToArray()); } return(datas); }
public List <Tuple <string, double> > Classify(Sentence sentence, ClassifyOptions options) { var categoryList = new List <Tuple <string, double> >(); var result = Predict(sentence, options).FirstOrDefault(); for (int i = 0; i < result.Length; i++) { categoryList.Add(new Tuple <string, double>(categories[i], result[i])); } return(categoryList); }
public List <Tuple <string, double> > Classify(Sentence sentence) { var options = new ClassifyOptions { ModelFilePath = _options.ModelFilePath }; _classifier.LoadModel(options); var classes = _classifier.Classify(sentence, options); classes = classes.OrderByDescending(x => x.Item2).ToList(); return(classes); }
public double[][] Predict(FeaturesWithLabel featureSet, ClassifyOptions options) { Problem predict = new Problem(); List <FeaturesWithLabel> featureSets = new List <FeaturesWithLabel>(); featureSets.Add(featureSet); predict.X = GetData(featureSets).ToArray(); predict.Y = new double[1]; predict.Count = predict.X.Count(); predict.MaxIndex = 300; RangeTransform transform = options.Transform; Problem scaled = transform.Scale(predict); return(Prediction.PredictLabelsProbability(options.Model, scaled)); }
public double[][] Predict(Sentence sentence, ClassifyOptions options) { Problem predict = new Problem(); predict.X = GetData(new List <Sentence> { sentence }).ToArray(); predict.Y = new double[1]; predict.Count = predict.X.Count(); predict.MaxIndex = features.Count; transform = options.Transform; Problem scaled = transform.Scale(predict); return(Prediction.PredictLabelsProbability(model, scaled)); }
public void Train(List <Sentence> sentences, ClassifyOptions options) { var tfidf = new TfIdfFeatureExtractor(); tfidf.Dimension = options.Dimension; tfidf.Sentences = sentences; tfidf.CalBasedOnCategory(); var encoder = new OneHotEncoder(); encoder.Sentences = sentences; encoder.Words = tfidf.Keywords(); words = encoder.EncodeAll(); var featureSets = sentences.Select(x => new Tuple <string, double[]>(x.Label, x.Vector)).ToList(); labelDist = featureSets.GroupBy(x => x.Item1) .Select(x => new Probability { Value = x.Key, Freq = x.Count() }) .OrderBy(x => x.Value) .ToList(); nb.LabelDist = labelDist; nb.FeatureSet = featureSets; // calculate prior prob labelDist.ForEach(l => l.Prob = nb.CalPriorProb(l.Value)); // calculate posterior prob // loop features var featureCount = nb.FeatureSet[0].Item2.Length; labelDist.ForEach(label => { for (int x = 0; x < featureCount; x++) { for (int v = 0; v < features.Length; v++) { string key = $"{label.Value} f{x} {features[v]}"; condProbDictionary[key] = nb.CalCondProb(x, label.Value, features[v]); } } }); }
public void SVMClassifierTrain(List <Sentence> sentences, ClassifyOptions options, SvmType svm = SvmType.C_SVC, KernelType kernel = KernelType.RBF, bool probability = true, string outputFile = null) { var tfidf = new TfIdfFeatureExtractor(); tfidf.Dimension = options.Dimension; tfidf.Sentences = sentences; tfidf.CalBasedOnCategory(); featuresInTfIdf = tfidf.Keywords(); // copy test multiclass Model Problem train = new Problem(); train.X = GetData(sentences, options).ToArray(); train.Y = GetLabels(sentences).ToArray(); train.Count = train.X.Count(); train.MaxIndex = train.X[0].Count();//int.MaxValue; Parameter param = new Parameter(); transform = RangeTransform.Compute(train); Problem scaled = transform.Scale(train); param.Gamma = 1.0 / 3; param.SvmType = svm; param.KernelType = kernel; param.Probability = probability; int numberOfClasses = train.Y.OrderBy(x => x).Distinct().Count(); if (numberOfClasses == 1) { Console.Write("Number of classes must greater than one!"); } if (svm == SvmType.C_SVC) { for (int i = 0; i < numberOfClasses; i++) { param.Weights[i] = 1; } } model = Training.Train(scaled, param); Console.Write("Training finished!"); }
public string SaveModel(ClassifyOptions options) { options.TransformFilePath = Path.Combine(options.ModelDir, "transform"); options.FeaturesFileName = Path.Combine(options.ModelDir, "features"); options.DictionaryFileName = Path.Combine(options.ModelDir, "dictionary"); options.CategoriesFileName = Path.Combine(options.ModelDir, "categories"); File.WriteAllText(options.FeaturesFileName, JsonConvert.SerializeObject(features)); File.WriteAllText(options.DictionaryFileName, JsonConvert.SerializeObject(dictionary)); File.WriteAllText(options.CategoriesFileName, JsonConvert.SerializeObject(categories)); RangeTransform.Write(options.TransformFilePath, transform); Bigtree.Algorithm.SVM.Model.Write(options.ModelFilePath, model); return(options.ModelFilePath); }
public string SaveModel(ClassifyOptions options) { // save the model var model = new MultinomiaNaiveBayesModel { LabelDist = labelDist, CondProbDictionary = condProbDictionary, Values = words }; //save the file using (var bw = new BinaryWriter(new FileStream(options.ModelFilePath, FileMode.Create))) { var bytes = Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(model)); bw.Write(bytes); } return(options.ModelFilePath); }
object IClassifier.LoadModel(ClassifyOptions options) { options.FeaturesFileName = Path.Combine(options.ModelDir, "features"); options.DictionaryFileName = Path.Combine(options.ModelDir, "dictionary"); options.ModelFilePath = Path.Combine(options.ModelDir, options.ModelName); options.TransformFilePath = Path.Combine(options.ModelDir, "transform"); options.CategoriesFileName = Path.Combine(options.ModelDir, "categories"); features = JsonConvert.DeserializeObject <List <String> >(File.ReadAllText(options.FeaturesFileName)); dictionary = JsonConvert.DeserializeObject <List <Tuple <string, int> > >(File.ReadAllText(options.DictionaryFileName)); categories = JsonConvert.DeserializeObject <List <String> >(File.ReadAllText(options.CategoriesFileName)); model = Bigtree.Algorithm.SVM.Model.Read(options.ModelFilePath); options.Transform = RangeTransform.Read(options.TransformFilePath); return(model); }
public Object LoadModel(ClassifyOptions options) { string json = String.Empty; //read the file using (var br = new BinaryReader(new FileStream(options.ModelFilePath, FileMode.Open))) { byte[] bytes = br.ReadBytes((int)br.BaseStream.Length); json = Encoding.UTF8.GetString(bytes); } var model = JsonConvert.DeserializeObject <MultinomiaNaiveBayesModel>(json); labelDist = model.LabelDist; condProbDictionary = model.CondProbDictionary; words = model.Values; return(model); }
public void SVMClassifierTrain(List <Sentence> sentences, ClassifyOptions options, SvmType svm = SvmType.C_SVC, KernelType kernel = KernelType.RBF, bool probability = true, string outputFile = null) { // copy test multiclass Model Problem train = new Problem(); train.X = GetData(sentences).ToArray(); train.Y = GetLabels(sentences).ToArray(); train.Count = train.X.Count(); train.MaxIndex = train.X[0].Count();//int.MaxValue; Parameter param = new Parameter(); transform = RangeTransform.Compute(train); Problem scaled = transform.Scale(train); param.Gamma = 1.0 / 3; param.SvmType = svm; param.KernelType = kernel; param.Probability = probability; int numberOfClasses = train.Y.OrderBy(x => x).Distinct().Count(); if (numberOfClasses == 1) { throw new ArgumentException("Number of classes can't be one!"); } if (svm == SvmType.C_SVC) { for (int i = 0; i < numberOfClasses; i++) { param.Weights[i] = 1; } } model = Training.Train(scaled, param); Console.Write("Training finished!"); }
public void SVMClassifierTrain(List <FeaturesWithLabel> featureSets, ClassifyOptions options, SvmType svm = SvmType.C_SVC, KernelType kernel = KernelType.RBF, bool probability = true, string outputFile = null) { // copy test multiclass Model Problem train = new Problem(); train.X = GetData(featureSets).ToArray(); train.Y = GetLabels(featureSets).ToArray(); train.Count = train.X.Count(); train.MaxIndex = 300;//int.MaxValue; Parameter param = new Parameter(); RangeTransform transform = RangeTransform.Compute(train); Problem scaled = transform.Scale(train); param.Gamma = 1.0 / 3; param.SvmType = svm; param.KernelType = kernel; param.Probability = probability; int numberOfClasses = train.Y.Distinct().Count(); if (numberOfClasses == 1) { throw new ArgumentException("Number of classes can't be one!"); } if (svm == SvmType.C_SVC) { for (int i = 0; i < numberOfClasses; i++) { param.Weights[i] = 1; } } var model = Training.Train(scaled, param); RangeTransform.Write(options.TransformFilePath, transform); SVM.BotSharp.MachineLearning.Model.Write(options.ModelFilePath, model); Console.Write("Training finished!"); }
public void Train(List <Sentence> sentences, ClassifyOptions options) { SVMClassifierTrain(sentences, options); }
public List <Tuple <string, double> > Classify(Sentence sentence, ClassifyOptions options) { throw new NotImplementedException(); }
public void Train(List <Sentence> sentences, ClassifyOptions options) { // SVMClassifierTrain(featureSets, options); }
object IClassifier.LoadModel(ClassifyOptions options) { throw new NotImplementedException(); }
public string SaveModel(ClassifyOptions options) { throw new NotImplementedException(); }