public void Train(List <Sentence> sentences, ClassifyOptions options) { var tfidf = new TfIdfFeatureExtractor(); tfidf.Dimension = options.Dimension; tfidf.Sentences = sentences; tfidf.CalBasedOnCategory(); var encoder = new OneHotEncoder(); encoder.Sentences = sentences; encoder.Words = tfidf.Keywords(); words = encoder.EncodeAll(); var featureSets = sentences.Select(x => new Tuple <string, double[]>(x.Label, x.Vector)).ToList(); labelDist = featureSets.GroupBy(x => x.Item1) .Select(x => new Probability { Value = x.Key, Freq = x.Count() }) .OrderBy(x => x.Value) .ToList(); nb.LabelDist = labelDist; nb.FeatureSet = featureSets; // calculate prior prob labelDist.ForEach(l => l.Prob = nb.CalPriorProb(l.Value)); // calculate posterior prob // loop features var featureCount = nb.FeatureSet[0].Item2.Length; labelDist.ForEach(label => { for (int x = 0; x < featureCount; x++) { for (int v = 0; v < features.Length; v++) { string key = $"{label.Value} f{x} {features[v]}"; condProbDictionary[key] = nb.CalCondProb(x, label.Value, features[v]); } } }); }
public void SVMClassifierTrain(List <Sentence> sentences, ClassifyOptions options, SvmType svm = SvmType.C_SVC, KernelType kernel = KernelType.RBF, bool probability = true, string outputFile = null) { var tfidf = new TfIdfFeatureExtractor(); tfidf.Dimension = options.Dimension; tfidf.Sentences = sentences; tfidf.CalBasedOnCategory(); featuresInTfIdf = tfidf.Keywords(); // copy test multiclass Model Problem train = new Problem(); train.X = GetData(sentences, options).ToArray(); train.Y = GetLabels(sentences).ToArray(); train.Count = train.X.Count(); train.MaxIndex = train.X[0].Count();//int.MaxValue; Parameter param = new Parameter(); transform = RangeTransform.Compute(train); Problem scaled = transform.Scale(train); param.Gamma = 1.0 / 3; param.SvmType = svm; param.KernelType = kernel; param.Probability = probability; int numberOfClasses = train.Y.OrderBy(x => x).Distinct().Count(); if (numberOfClasses == 1) { Console.Write("Number of classes must greater than one!"); } if (svm == SvmType.C_SVC) { for (int i = 0; i < numberOfClasses; i++) { param.Weights[i] = 1; } } model = Training.Train(scaled, param); Console.Write("Training finished!"); }