static TwitterSentimentDemo()
 {
     string modelFileName = HttpContext.Current.Server.MapPath("App_Data\\AdCfy.bin");
     string bowSpcFileName = HttpContext.Current.Server.MapPath("App_Data\\AdCfyBowSpc.bin");
     mClassifier = new SvmBinaryClassifier<int>();
     mClassifier.LoadModel(modelFileName);
     BinarySerializer bs = new BinarySerializer(bowSpcFileName, FileMode.Open);
     mBowSpace = new BowSpace(bs);
     bs.Close();
     mReady = true;
 }
 static PumpIndexComponent()
 {
     Logger.GetLogger(typeof(PumpIndexComponent)).Info("PumpIndexComponent", "Loading model ...");
     string fileName = Utils.GetConfigValue("PumpIndexModel", ".\\PumpIndexModel.bin");
     using (BinarySerializer reader = new BinarySerializer(fileName, FileMode.Open))
     {
         mBowSpace = new BowSpace(reader);
         mClassifier = new SvmBinaryClassifier<int>(reader);
         mAvgDistPos = reader.ReadDouble();
         mAvgDistNeg = reader.ReadDouble();
         //Console.WriteLine(mAvgDistPos);
         //Console.WriteLine(mAvgDistNeg);
     }
     Logger.GetLogger(typeof(PumpIndexComponent)).Info("PumpIndexComponent", "Done.");
 }
        static PumpIndexComponent()
        {
            Logger.GetLogger(typeof(PumpIndexComponent)).Info("PumpIndexComponent", "Loading model ...");
            string fileName = Utils.GetConfigValue("PumpIndexModel", ".\\PumpIndexModel.bin");

            using (BinarySerializer reader = new BinarySerializer(fileName, FileMode.Open))
            {
                mBowSpace   = new BowSpace(reader);
                mClassifier = new SvmBinaryClassifier <int>(reader);
                mAvgDistPos = reader.ReadDouble();
                mAvgDistNeg = reader.ReadDouble();
                //Console.WriteLine(mAvgDistPos);
                //Console.WriteLine(mAvgDistNeg);
            }
            Logger.GetLogger(typeof(PumpIndexComponent)).Info("PumpIndexComponent", "Done.");
        }
Esempio n. 4
0
        public override void Run(object[] args)
        {
            // prepare data
            IStemmer stemmer;

            Set <string> .ReadOnly stopWords;
            TextMiningUtils.GetLanguageTools(Language.English, out stopWords, out stemmer);

            // Create a tokenizer.
            var tokenizer = new UnicodeTokenizer
            {
                MinTokenLen = 2,                            // Each token must be at least 2 characters long.
                Filter      = TokenizerFilter.AlphaStrict   // Tokens can consist of alphabetic characters only.
            };

            // take data for two classes from cvs file
            var data = new List <LabeledTweet>(GetLabeledTweets().Where(lt => lt.Polarity != 2)).ToList();

            // Create a bag-of-words space.
            var bowSpc = new BowSpace
            {
                Tokenizer      = tokenizer,                 // Assign the tokenizer.
                StopWords      = stopWords,                 // Assign the stop words.
                Stemmer        = stemmer,                   // Assign the stemmer.
                MinWordFreq    = 1,                         // A term must appear at least n-times in the corpus for it to be part of the vocabulary.
                MaxNGramLen    = 2,                         // Terms consisting of at most n-consecutive words will be considered.
                WordWeightType = WordWeightType.TermFreq,   // Set the weighting scheme for the bag-of-words vectors to TF.
                //WordWeightType = WordWeightType.TfIdf,  // Set the weighting scheme for the bag-of-words vectors to TF-IDF.
                NormalizeVectors  = true,                   // The TF-IDF vectors will be normalized.
                CutLowWeightsPerc = 0                       // The terms with the lowest weights, summing up to 20% of the overall weight sum, will be removed from each TF-IDF vector.
            };
            ArrayList <SparseVector <double> > bowData = bowSpc.Initialize(data.Select(d => d.Text));

            // label data
            var labeledSet = new LabeledDataset <string, SparseVector <double> >();

            for (int i = 0; i < data.Count; i++)
            {
                labeledSet.Add(data[i].Label, bowData[i]);
            }
            labeledSet.Shuffle();

            int testSize    = labeledSet.Count / 10;
            var trainingSet = new LabeledDataset <string, SparseVector <double> >(labeledSet.Skip(testSize));
            var testSet     = new LabeledDataset <string, SparseVector <double> >(labeledSet.Take(testSize));

            //-------------------- SVM

            var svmBinClass = new SvmBinaryClassifier <string> {
                VerbosityLevel = SvmLightVerbosityLevel.Off
            };

            if (args.Any())
            {
                svmBinClass.C = (int)args[0];
            }
            //svmBinClass.BiasedHyperplane = true;
            //svmBinClass.CustomParams = "-t 3";   // non-linear kernel
            //svmBinClass.CustomParams = String.Format("-j {0}",j);

            svmBinClass.Train(trainingSet);

            int    correct = 0;
            double avgDist = 0;

            foreach (LabeledExample <string, SparseVector <double> > labeledExample in testSet)
            {
                var prediction = svmBinClass.Predict(labeledExample.Example);
                //Output.WriteLine("actual: {0}\tpredicted: {1}\t score: {2:0.0000}", labeledExample.Label, prediction.BestClassLabel, prediction.BestScore);
                avgDist += prediction.BestScore;
                if (prediction.BestClassLabel == labeledExample.Label)
                {
                    correct++;
                }
            }

            Output.WriteLine("Accuracy: {0:0.00}", 100.0 * correct / testSet.Count);
            Output.WriteLine("Avg. distance: {0:0.00}", avgDist / testSet.Count);

            Result.Add("accuracy", (double)correct / testSet.Count);

            Result.Add("classifier", svmBinClass);
            Result.Add("labeled_data", labeledSet);
        }