public void SVMClassifierTrain(List <Sentence> sentences, ClassifyOptions options, SvmType svm = SvmType.C_SVC, KernelType kernel = KernelType.RBF, bool probability = true, string outputFile = null) { var tfidf = new TfIdfFeatureExtractor(); tfidf.Dimension = options.Dimension; tfidf.Sentences = sentences; tfidf.CalBasedOnCategory(); featuresInTfIdf = tfidf.Keywords(); // copy test multiclass Model Problem train = new Problem(); train.X = GetData(sentences, options).ToArray(); train.Y = GetLabels(sentences).ToArray(); train.Count = train.X.Count(); train.MaxIndex = train.X[0].Count();//int.MaxValue; Parameter param = new Parameter(); transform = RangeTransform.Compute(train); Problem scaled = transform.Scale(train); param.Gamma = 1.0 / 3; param.SvmType = svm; param.KernelType = kernel; param.Probability = probability; int numberOfClasses = train.Y.OrderBy(x => x).Distinct().Count(); if (numberOfClasses == 1) { Console.Write("Number of classes must greater than one!"); } if (svm == SvmType.C_SVC) { for (int i = 0; i < numberOfClasses; i++) { param.Weights[i] = 1; } } model = Training.Train(scaled, param); Console.Write("Training finished!"); }
object IClassifier.LoadModel(ClassifyOptions options) { options.FeaturesFileName = Path.Combine(options.ModelDir, "features"); options.DictionaryFileName = Path.Combine(options.ModelDir, "dictionary"); options.ModelFilePath = Path.Combine(options.ModelDir, options.ModelName); options.TransformFilePath = Path.Combine(options.ModelDir, "transform"); options.CategoriesFileName = Path.Combine(options.ModelDir, "categories"); features = JsonConvert.DeserializeObject <List <String> >(File.ReadAllText(options.FeaturesFileName)); dictionary = JsonConvert.DeserializeObject <List <Tuple <string, int> > >(File.ReadAllText(options.DictionaryFileName)); categories = JsonConvert.DeserializeObject <List <String> >(File.ReadAllText(options.CategoriesFileName)); model = Bigtree.Algorithm.SVM.Model.Read(options.ModelFilePath); options.Transform = RangeTransform.Read(options.TransformFilePath); return(model); }
public void SVMClassifierTrain(List <Sentence> sentences, ClassifyOptions options, SvmType svm = SvmType.C_SVC, KernelType kernel = KernelType.RBF, bool probability = true, string outputFile = null) { // copy test multiclass Model Problem train = new Problem(); train.X = GetData(sentences).ToArray(); train.Y = GetLabels(sentences).ToArray(); train.Count = train.X.Count(); train.MaxIndex = train.X[0].Count();//int.MaxValue; Parameter param = new Parameter(); transform = RangeTransform.Compute(train); Problem scaled = transform.Scale(train); param.Gamma = 1.0 / 3; param.SvmType = svm; param.KernelType = kernel; param.Probability = probability; int numberOfClasses = train.Y.OrderBy(x => x).Distinct().Count(); if (numberOfClasses == 1) { throw new ArgumentException("Number of classes can't be one!"); } if (svm == SvmType.C_SVC) { for (int i = 0; i < numberOfClasses; i++) { param.Weights[i] = 1; } } model = Training.Train(scaled, param); Console.Write("Training finished!"); }