Esempio n. 1
0
        static void wordsSvm()
        {//http://www.svm-tutorial.com/2014/10/svm-tutorial-classify-text-csharp/
            var index   = 11;
            var rooms11 = File.ReadAllText(string.Format("../../room{0}.txt", index));
            var rooms22 = File.ReadAllText(string.Format("../../room{0}.txt", ++index));

            var rooms1 = rooms11.Split(';');
            var rooms2 = rooms22.Split(';');

            var termFreqWeight = new PanGuTermFreqWeight();

            termFreqWeight.computerFW(rooms1, rooms2);

            var problem = new SVMProblem(); //SVM.SetPrintStringFunction(mySVMPrintFunction);

            var lableFeaturesBuilder = new LableFeaturesBuilder();
            var segment = new PanGuSegment();

            foreach (var room in rooms2)
            {
                var words = termFreqWeight.TermsByDoc[1, room];
                words = words.Where(t => t.Length > 1 && termFreqWeight.TermFWByGlobal.ContainsKey(t) && termFreqWeight.TermFWByType.ContainsKey(0, t)).ToArray();
                //lableFeaturesBuilder.AddToProblem(problem, room, words.Select(t => new KeyValuePair<string, double>(t,termFreqWeight.TermFWByDoc[1, room, t].Freq*GetW(t))));
                lableFeaturesBuilder.AddToProblem(problem, room, words.Select(t => new KeyValuePair <string, double>(t, Math.Max(termFreqWeight.TermFWByType[0, t].Freq, termFreqWeight.TermFWByType[1, t].Freq))));
            }

            SVMParameter parameter = new SVMParameter();

            parameter.Type        = SVMType.C_SVC;
            parameter.Kernel      = SVMKernelType.LINEAR;
            parameter.C           = 1;
            parameter.Probability = true;
            //parameter.
            //parameter.WeightLabels = lableFeaturesBuilder.CreateWeightFeatures("大", "双", "套", "大床", "双床").ToArray();
            //parameter.Weights = new double[] { 1.90, 1.90, 1.90, 1.99, 1.99 };

            //parameter.WeightLabels = lableFeaturesBuilder.CreateWeightLables<int>("行政湖景双床房").ToArray();
            //parameter.Weights = new double[] { 1.90, 1.90, 1.90, 1.99, 1.99 };

            problem = problem.Normalize(SVMNormType.L1);
            problem.CheckParameter(parameter);

            var model2 = LibSVMsharp.SVM.Train(problem, parameter);

            model2.SaveModel("roomMatching.model");

            var model = LibSVMsharp.SVM.LoadModel("roomMatching.model");

            foreach (var room in rooms1)
            {
                var words = termFreqWeight.TermsByDoc[0, room];
                words = words.Where(t => t.Length > 1 && termFreqWeight.TermFWByGlobal.ContainsKey(t) && termFreqWeight.TermFWByType.ContainsKey(1, t)).ToArray();

                //var nodes = lableFeaturesBuilder.CreateNodes(words.Select(t => new KeyValuePair<string, double>(t, termFreqWeight.TermFWByDoc[0, room, t].Freq * GetW(t))));
                var nodes = lableFeaturesBuilder.CreateNodes(words.Select(t => new KeyValuePair <string, double>(t, Math.Max(termFreqWeight.TermFWByType[0, t].Freq, termFreqWeight.TermFWByType[1, t].Freq))));
                if (nodes.Length > 0)
                {
                    nodes = nodes.Normalize(SVMNormType.L1);

                    double predictedY = 0;
                    predictedY = LibSVMsharp.SVM.Predict(model, nodes);

                    double[] values = null; double probabilityValue = 0;
                    probabilityValue = LibSVMsharp.SVM.PredictValues(model, nodes, out values);

                    double[] est = null; double probability = 0;
                    probability = LibSVMsharp.SVM.PredictProbability(model, nodes, out est);

                    Console.WriteLine("{0,22}\t{1},{2},{3},{4},{5}", room, lableFeaturesBuilder.GetLable(predictedY), predictedY, probabilityValue, probability, string.Empty);
                }
            }

            Console.WriteLine(new string('=', 80));
        }
Esempio n. 2
0
 public PanGuTermFreqWeight()
 {
     Segment = new PanGuSegment();
 }