Пример #1
0
        public void Train(List <Sentence> sentences, ClassifyOptions options)
        {
            var tfidf = new TfIdfFeatureExtractor();

            tfidf.Dimension = options.Dimension;
            tfidf.Sentences = sentences;
            tfidf.CalBasedOnCategory();

            var encoder = new OneHotEncoder();

            encoder.Sentences = sentences;
            encoder.Words     = tfidf.Keywords();
            words             = encoder.EncodeAll();

            var featureSets = sentences.Select(x => new Tuple <string, double[]>(x.Label, x.Vector)).ToList();

            labelDist = featureSets.GroupBy(x => x.Item1)
                        .Select(x => new Probability
            {
                Value = x.Key,
                Freq  = x.Count()
            })
                        .OrderBy(x => x.Value)
                        .ToList();

            nb.LabelDist  = labelDist;
            nb.FeatureSet = featureSets;

            // calculate prior prob
            labelDist.ForEach(l => l.Prob = nb.CalPriorProb(l.Value));

            // calculate posterior prob
            // loop features
            var featureCount = nb.FeatureSet[0].Item2.Length;

            labelDist.ForEach(label =>
            {
                for (int x = 0; x < featureCount; x++)
                {
                    for (int v = 0; v < features.Length; v++)
                    {
                        string key = $"{label.Value} f{x} {features[v]}";
                        condProbDictionary[key] = nb.CalCondProb(x, label.Value, features[v]);
                    }
                }
            });
        }
Пример #2
0
        public void SVMClassifierTrain(List <Sentence> sentences, ClassifyOptions options, SvmType svm = SvmType.C_SVC, KernelType kernel = KernelType.RBF, bool probability = true, string outputFile = null)
        {
            var tfidf = new TfIdfFeatureExtractor();

            tfidf.Dimension = options.Dimension;
            tfidf.Sentences = sentences;
            tfidf.CalBasedOnCategory();
            featuresInTfIdf = tfidf.Keywords();

            // copy test multiclass Model
            Problem train = new Problem();

            train.X        = GetData(sentences, options).ToArray();
            train.Y        = GetLabels(sentences).ToArray();
            train.Count    = train.X.Count();
            train.MaxIndex = train.X[0].Count();//int.MaxValue;

            Parameter param = new Parameter();

            transform = RangeTransform.Compute(train);
            Problem scaled = transform.Scale(train);

            param.Gamma       = 1.0 / 3;
            param.SvmType     = svm;
            param.KernelType  = kernel;
            param.Probability = probability;

            int numberOfClasses = train.Y.OrderBy(x => x).Distinct().Count();

            if (numberOfClasses == 1)
            {
                Console.Write("Number of classes must greater than one!");
            }

            if (svm == SvmType.C_SVC)
            {
                for (int i = 0; i < numberOfClasses; i++)
                {
                    param.Weights[i] = 1;
                }
            }

            model = Training.Train(scaled, param);

            Console.Write("Training finished!");
        }