Exemple #1
0
        /// <summary>
        ///   Creates and learns a Naive Bayes classifier to recognize
        ///   the previously loaded dataset using the current settings.
        /// </summary>
        ///
        private void btnCreate_Click(object sender, EventArgs e)
        {
            if (dgvLearningSource.DataSource == null)
            {
                MessageBox.Show("Please load some data first.");
                return;
            }

            //classNames = new string[] { "G1", "G2"};
            classNames = new string[] { "Non Survived", "Survived" };


            // Finishes and save any pending changes to the given data
            dgvLearningSource.EndEdit();

            // Creates a matrix from the source data table
            double[,] table = (dgvLearningSource.DataSource as DataTable).ToMatrix(out columnNames);

            // Get only the input vector values
            double[][] inputs = table.GetColumns(0, 1, 2, 3, 4, 5).ToJagged();

            // Get only the label outputs
            int[]    outputs  = table.GetColumn(6).ToInt32();
            string[] colNames = columnNames.Get(0, 6);

            // Create the Bayes classifier and perform classification
            var teacher = new NaiveBayesLearning <NormalDistribution>();

            // Estimate the model using the data
            bayes = teacher.Learn(inputs, outputs);

            // Show the estimated distributions and class probabilities
            dataGridView1.DataSource = new ArrayDataView(bayes.Distributions, colNames);


            // Generate samples for class 1
            var x1 = bayes.Distributions[0, 2].Generate(1000);
            var y1 = bayes.Distributions[0, 4].Generate(1000);

            // Generate samples for class 2
            var x2 = bayes.Distributions[1, 2].Generate(1000);
            var y2 = bayes.Distributions[1, 4].Generate(1000);



            // Combine in a single graph
            double[,] w1 = Matrix.Stack(x1, y1).Transpose();
            double[,] w2 = Matrix.Stack(x2, y2).Transpose();


            double[] z = Vector.Ones(6000);
            for (int i = 0; i < 1000; i++)
            {
                z[i] = 0;
            }

            var a = Matrix.Stack <double>(new double[][, ] {
                w1, w2
            });
            var graph = a.Concatenate(z);

            CreateScatterplot(zedGraphControl2, table);


            lbStatus.Text = "Classifier created! See the other tabs for details!";
        }
 public NaiveBayesController(IPlanRepository planRepository)
 {
     _planRepository = planRepository;
     _naiveBayes     = new NaiveBayes();
     _naiveBayes.AddTrainingSet(_planRepository.People);
 }
Exemple #3
0
 public void Train(List <List <object> > X, List <object> Y)
 {
     Model = new NaiveBayes();
     Model.Train(X, Y);
 }
        private void GenerateBasedOnData()
        {
            List <string[]> generating = new List <string[]>(); // do ewentualnego sprawdzania

            var attrType = RemoveAt(this.attrType, 0);

            //tutaj dorzucam tworzenie wykresu ciągłego prawdopodobieństwa
            Spline3Deg[,] probabilities = new Spline3Deg[classes, attribs];
            for (int i = 0; i < attribs; i++)
            {
                if (attrType[i].Equals("double") || attrType[i].Equals("integer"))
                {
                    for (int j = 0; j < classes; j++)
                    {
                        int      c    = values.ElementAt(j).Value.Item2.ElementAt(i).Value.Count;
                        double[] y, x = new double[c];
                        SortedList <double, int> temp = new SortedList <double, int>();
                        foreach (var v in values.ElementAt(j).Value.Item2.ElementAt(i).Value)
                        {
                            int tI = v.Value; double tD = Double.Parse(v.Key.Replace(" ", string.Empty),
                                                                       System.Globalization.NumberStyles.AllowDecimalPoint,
                                                                       System.Globalization.NumberFormatInfo.InvariantInfo);
                            temp.Add(tD, tI);
                        }
                        y    = temp.Keys.ToArray();
                        x[0] = 0;
                        for (int k = 1; k < temp.Count; k++)
                        {
                            x[k] = x[k - 1] + temp.ElementAt(k - 1).Value + temp.ElementAt(k).Value;
                        }
                        probabilities[j, i] = new Spline3Deg(x, y);
                    }
                }
            }


            //do sprawdzania punktacji później
            //podzielić dane wejściowe i wygenerowane na klasy i artybuty
            var readClass  = new int[reading.Count];
            var readAttr_d = new double[reading.Count, reading.ElementAt(0).Length - 1].ToJagged();

            var stringIntCheatSheet = new Dictionary <string, int> [reading.ElementAt(0).Length];

            for (int i = 0; i < stringIntCheatSheet.Length; i++)
            {
                stringIntCheatSheet[i] = new Dictionary <string, int>();
            }

            for (int x = 0; x < reading.Count; x++)
            {
                for (int y = 0; y < reading.ElementAt(0).Length; y++)
                {
                    double rr = 0;
                    string ss = reading.ElementAt(x)[y];
                    if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint,
                                         System.Globalization.NumberFormatInfo.InvariantInfo, out rr) ||
                        y == 0)
                    {
                        if (!stringIntCheatSheet[y].ContainsKey(ss))
                        {
                            stringIntCheatSheet[y].Add(ss, stringIntCheatSheet[y].Count);
                        }
                        rr = stringIntCheatSheet[y][ss];
                    }
                    if (y == 0)
                    {
                        readClass[x] = (int)rr;
                    }
                    else
                    {
                        readAttr_d[x][y - 1] = rr;
                    }
                }
            }
            int readClassesSqrt = (int)Math.Round(Math.Sqrt(reading.Count)),
                genClassesSqrt, mixClassesSqrt;
            var learnKnn = new KNearestNeighbors(readClassesSqrt);

            var knn = learnKnn.Learn(readAttr_d, readClass);

            double[] attrcr = new double[attribs];


            string[] bestattr = new string[attribs];
            double   bestscore;

            //czas generować ten szajs
            var newStuff = new string[newData, attribs + 1];

            for (int it = 0; it < newStuff.GetLength(0); it++)
            {
                bestscore = 0;

                int cl = rnd.Next(classes); //rnd to zadelkarowany wcześniej Random //losowanie klasy
                newStuff[it, 0] = values.ElementAt(cl).Key;
                int safety = 0;
                do
                {
                    for (int v = 1; v <= attribs; v++)
                    {     //losowanie wartości atrybutu
                        if (attrType[v - 1].Equals("string"))
                        { //funkcja dyskretna
                            int val = rnd.Next(values.ElementAt(cl).Value.Item1);
                            int b   = 0;
                            foreach (var a in values.ElementAt(cl).Value.Item2[v])
                            {
                                if (val < (b += a.Value))
                                {
                                    newStuff[it, v] = a.Key; //na Monte Carlo
                                    break;
                                }
                            }
                        }
                        else
                        {  //funkcja ciągła
                            Tuple <double, double> extr = probabilities[cl, v - 1].Limits();
                            double val = rnd.Next((int)extr.Item1, (int)extr.Item2) + rnd.NextDouble();
                            double r   = probabilities[cl, v - 1].y(val);
                            if (attrType[v - 1].Equals("double"))
                            {
                                newStuff[it, v] = r.ToString(fltPrec, System.Globalization.CultureInfo.InvariantCulture);
                            }
                            else //if (attrType[v - 1].Equals("integer"))
                            {
                                newStuff[it, v] = Math.Round(r).ToString();
                            }
                        }//koniec losowania wartości atrybutu
                        ///ekstra warunek bezpieczeństwa, bo czasami trafiają się NULLe
                        if (string.IsNullOrEmpty(newStuff[it, v]))
                        {
                            v--;
                            continue; //jeśli atrybut ma nulla, powtórz pętlę
                        }
                        ///koniec ekstra warunku bespieczeństwa
                    }//koniec generowania obiektu


                    //do tabliczki do sprawdzenia punktacji
                    for (int v = 1; v <= attribs; v++)
                    {
                        double rr = 0;
                        string ss = newStuff[it, v];
                        if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint,
                                             System.Globalization.NumberFormatInfo.InvariantInfo, out rr))
                        {
                            if (!stringIntCheatSheet[v].ContainsKey(ss))
                            {
                                stringIntCheatSheet[v].Add(ss, stringIntCheatSheet[v].Count);
                            }
                            rr = stringIntCheatSheet[v][ss];
                        }
                        attrcr[v - 1] = rr;
                    }
                    if (knn.Score(attrcr, cl) > bestscore)
                    {
                        for (int iter = 0; iter < attribs; iter++)
                        {
                            bestattr[iter] = newStuff[it, iter + 1];
                        }
                    }
                } while (knn.Score(attrcr, cl) < scoreH / 100 && ++safety < 1000);

                for (int iter = 0; iter < attribs; iter++)
                {
                    newStuff[it, iter + 1] = bestattr[iter];
                }
            }//koniec całego generowania

            //tu dać zapis do pliku
            string savefiledir = "";

            using (var dirB = new System.Windows.Forms.SaveFileDialog())
            {
                dirB.Filter     = "Text Files | *.txt";
                dirB.DefaultExt = "txt";
                var res = dirB.ShowDialog();
                if (res == System.Windows.Forms.DialogResult.OK)
                {
                    using (var write = new System.IO.StreamWriter(savefiledir = dirB.FileName))
                    {
                        for (int x = 0; x < newStuff.GetLength(0); x++)
                        {
                            string line = "";
                            for (int y = 0; y < newStuff.GetLength(1); y++)
                            {
                                line += newStuff[x, y] + ',';
                            }
                            line = line.Remove(line.Length - 1);
                            string[] temp = line.Split(',');
                            generating.Add(line.Split(','));
                            swap(ref temp[0], ref temp[clsCol]);
                            line = "";
                            for (int y = 0; y < temp.Length; y++)
                            {
                                line += temp[y] + ',';
                            }
                            line = line.Remove(line.Length - 1);
                            write.WriteLine(line);
                        }
                    }
                }
                else
                {
                    return;
                }
            }
            //tu dać walidację wygenerowanych danych

            var dialogResult = System.Windows.MessageBox.Show("Do you want to test the generated data?", "Data testing - extended data", System.Windows.MessageBoxButton.YesNo);

            if (dialogResult == MessageBoxResult.Yes)
            {
                var genClass = new int[generating.Count];
                //var genAttr = new int[generating.Count, generating.ElementAt(0).Length - 1].ToJagged();
                var genAttr_d = new double[generating.Count, generating.ElementAt(0).Length - 1].ToJagged();


                for (int x = 0; x < generating.Count; x++)
                {
                    for (int y = 0; y < generating.ElementAt(0).Length; y++)
                    {
                        double rr = 0;
                        string ss = generating.ElementAt(x)[y];
                        if (!double.TryParse(ss, System.Globalization.NumberStyles.AllowDecimalPoint,
                                             System.Globalization.NumberFormatInfo.InvariantInfo, out rr) || y == 0)
                        {
                            if (!stringIntCheatSheet[y].ContainsKey(ss))
                            {
                                stringIntCheatSheet[y].Add(ss, stringIntCheatSheet[y].Count);
                            }
                            rr = stringIntCheatSheet[y][ss];
                        }
                        if (y == 0)
                        {
                            genClass[x] = (int)rr;
                        }
                        else
                        {
                            genAttr_d[x][y - 1] = rr;
                        }
                    }
                }

                //przerobienie na tablicę intów, z przesunięciem dobli o precyzję
                var genAttr_i  = new int[generating.Count, generating.ElementAt(0).Length - 1].ToJagged();
                var readAttr_i = new int[reading.Count, reading.ElementAt(0).Length - 1].ToJagged();

                int shift = (int)Math.Pow(10, FltPrecBox.SelectedIndex + 1);
                for (int x = 0; x < generating.Count; x++)
                {
                    for (int y = 0; y < generating.ElementAt(0).Length - 1; y++)
                    {
                        if (attrType[y].Equals("double"))
                        {
                            genAttr_i[x][y] = (int)(genAttr_d[x][y] * shift);
                        }
                        else
                        {
                            genAttr_i[x][y] = (int)genAttr_d[x][y];
                        }
                    }
                }
                for (int x = 0; x < reading.Count; x++)
                {
                    for (int y = 0; y < reading.ElementAt(0).Length - 1; y++)
                    {
                        if (attrType[y].Equals("double"))
                        {
                            readAttr_i[x][y] = (int)(readAttr_d[x][y] * shift);
                        }
                        else
                        {
                            readAttr_i[x][y] = (int)readAttr_d[x][y];
                        }
                    }
                }


                int correctnb = 0, incorrectnb = 0, correctknn = 0, incorrectknn = 0, correctsvm = 0, incorrectsvm = 0;

                var        learn = new NaiveBayesLearning();
                NaiveBayes nb    = learn.Learn(readAttr_i, readClass);
                var        test  = nb.Decide(genAttr_i);
                foreach (var v in test)
                {
                    if (v.Equals(genClass[test.IndexOf(v)]))
                    {
                        correctnb++;
                    }
                    else
                    {
                        incorrectnb++;
                    }
                }

                /////////////////////////////////////////////////////////////////////////

                var testknn = knn.Decide(genAttr_d);
                for (int i = 0; i < testknn.Length; i++)
                //foreach (var v in testknn)
                {
                    if (testknn[i].Equals(genClass[i]))
                    {
                        correctknn++;
                    }
                    else
                    {
                        incorrectknn++;
                    }
                }
                /////////////////////////////////////////////////////////////////////////

                try
                {
                    var teach = new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        // Configure the learning algorithm to use SMO to train the
                        //  underlying SVMs in each of the binary class subproblems.
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            // Estimate a suitable guess for the Gaussian kernel's parameters.
                            // This estimate can serve as a starting point for a grid search.
                            UseKernelEstimation = true
                        }
                    };
                    var svm = teach.Learn(readAttr_d, readClass);

                    var testsvm = svm.Decide(genAttr_d);
                    for (int i = 0; i < testsvm.Length; i++)
                    //foreach (var v in testknn)
                    {
                        if (testsvm[i].Equals(genClass[i]))
                        {
                            correctsvm++;
                        }
                        else
                        {
                            incorrectsvm++;
                        }
                    }
                }
                catch (AggregateException) { }
                ////////////////////////////////////////////////////////////

                double[][] mixAttr_d = new double[genAttr_d.GetLength(0) + readAttr_d.GetLength(0),
                                                  genAttr_d[0].Length].ToJagged();
                int[] mixClass = new int[genClass.Length + readClass.Length];

                Array.Copy(readClass, mixClass, readClass.Length);
                Array.Copy(genClass, 0, mixClass, readClass.Length, genClass.Length);

                Array.Copy(readAttr_d, mixAttr_d, readAttr_d.Length);
                Array.Copy(genAttr_d, 0, mixAttr_d, readAttr_d.Length, genAttr_d.Length);

                int[][] mixAttr_i = new int[genAttr_i.GetLength(0) + readAttr_i.GetLength(0),
                                            genAttr_i[0].Length].ToJagged();

                Array.Copy(readAttr_i, mixAttr_i, readAttr_i.Length);
                Array.Copy(genAttr_i, 0, mixAttr_i, readAttr_i.Length, genAttr_i.Length);

                //KROSWALIDACJAAAAAAAAAAAAAAAAAA
                genClassesSqrt = (int)Math.Round(Math.Sqrt(genClass.Length));
                mixClassesSqrt = (int)Math.Round(Math.Sqrt(mixClass.Length));

                //KNN

                var crossvalidationRead = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new KNearestNeighbors(k: readClassesSqrt),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: readAttr_d, y: readClass
                    );
                var resultRead = crossvalidationRead.Learn(readAttr_d, readClass);
                // We can grab some information about the problem:
                var numberOfSamplesRead = resultRead.NumberOfSamples;
                var numberOfInputsRead  = resultRead.NumberOfInputs;
                var numberOfOutputsRead = resultRead.NumberOfOutputs;

                var trainingErrorRead   = resultRead.Training.Mean;
                var validationErrorRead = resultRead.Validation.Mean;

                var    readCM       = resultRead.ToConfusionMatrix(readAttr_d, readClass);
                double readAccuracy = readCM.Accuracy;
                //////////////////////////////////////////////////////////
                var crossvalidationGen = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new KNearestNeighbors(k: genClassesSqrt),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: genAttr_d, y: genClass
                    );
                var resultGen = crossvalidationGen.Learn(genAttr_d, genClass);
                // We can grab some information about the problem:
                var numberOfSamplesGen = resultGen.NumberOfSamples;
                var numberOfInputsGen  = resultGen.NumberOfInputs;
                var numberOfOutputsGen = resultGen.NumberOfOutputs;

                var    trainingErrorGen   = resultGen.Training.Mean;
                var    validationErrorGen = resultGen.Validation.Mean;
                var    genCM       = resultGen.ToConfusionMatrix(genAttr_d, genClass);
                double genAccuracy = genCM.Accuracy;
                //////////////////////////////////////////////////////////

                var crossvalidationMix = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new KNearestNeighbors(k: mixClassesSqrt),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: mixAttr_d, y: mixClass
                    );
                var resultMix = crossvalidationMix.Learn(readAttr_d, readClass);
                // We can grab some information about the problem:
                var numberOfSamplesMix = resultMix.NumberOfSamples;
                var numberOfInputsMix  = resultMix.NumberOfInputs;
                var numberOfOutputsMix = resultMix.NumberOfOutputs;

                var trainingErrorMix   = resultMix.Training.Mean;
                var validationErrorMix = resultMix.Validation.Mean;

                var    mixCM       = resultMix.ToConfusionMatrix(mixAttr_d, mixClass);
                double mixAccuracy = mixCM.Accuracy;

                //NB
                var crossvalidationReadnb = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new NaiveBayesLearning(),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: readAttr_i, y: readClass
                    );
                var resultReadnb = crossvalidationReadnb.Learn(readAttr_i, readClass);
                // We can grab some information about the problem:
                var numberOfSamplesReadnb = resultReadnb.NumberOfSamples;
                var numberOfInputsReadnb  = resultReadnb.NumberOfInputs;
                var numberOfOutputsReadnb = resultReadnb.NumberOfOutputs;

                var trainingErrorReadnb   = resultReadnb.Training.Mean;
                var validationErrorReadnb = resultReadnb.Validation.Mean;

                var    readCMnb       = resultReadnb.ToConfusionMatrix(readAttr_i, readClass);
                double readAccuracynb = readCMnb.Accuracy;
                //////////////////////////////////////////////////////////
                var crossvalidationGennb = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new NaiveBayesLearning(),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: genAttr_i, y: genClass
                    );
                var resultGennb = crossvalidationGennb.Learn(genAttr_i, genClass);
                // We can grab some information about the problem:
                var numberOfSamplesGennb = resultGennb.NumberOfSamples;
                var numberOfInputsGennb  = resultGennb.NumberOfInputs;
                var numberOfOutputsGennb = resultGennb.NumberOfOutputs;

                var    trainingErrorGennb   = resultGennb.Training.Mean;
                var    validationErrorGennb = resultGennb.Validation.Mean;
                var    genCMnb       = resultGennb.ToConfusionMatrix(genAttr_i, genClass);
                double genAccuracynb = genCMnb.Accuracy;
                //////////////////////////////////////////////////////////

                var crossvalidationMixnb = CrossValidation.Create(
                    k: 4,
                    learner: (p) => new NaiveBayesLearning(),
                    loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                    fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                    x: mixAttr_i, y: mixClass
                    );
                var resultMixnb = crossvalidationMixnb.Learn(mixAttr_i, mixClass);
                // We can grab some information about the problem:
                var numberOfSamplesMixnb = resultMixnb.NumberOfSamples;
                var numberOfInputsMixnb  = resultMixnb.NumberOfInputs;
                var numberOfOutputsMixnb = resultMixnb.NumberOfOutputs;

                var trainingErrorMixnb   = resultMixnb.Training.Mean;
                var validationErrorMixnb = resultMixnb.Validation.Mean;

                var    mixCMnb       = resultMixnb.ToConfusionMatrix(mixAttr_i, mixClass);
                double mixAccuracynb = mixCMnb.Accuracy;

                //SVM
                double readAccuracysvm = 0, genAccuracysvm = 0, mixAccuracysvm = 0;
                try
                {
                    var crossvalidationReadsvm = CrossValidation.Create(
                        k: 4,
                        learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            UseKernelEstimation = true
                        }
                    },
                        loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                        fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                        x: readAttr_d, y: readClass
                        );
                    //crossvalidationReadsvm.ParallelOptions.MaxDegreeOfParallelism = 1;
                    var resultReadsvm = crossvalidationReadsvm.Learn(readAttr_d, readClass);
                    // We can grab some information about the problem:
                    var numberOfSamplesReadsvm = resultReadsvm.NumberOfSamples;
                    var numberOfInputsReadsvm  = resultReadsvm.NumberOfInputs;
                    var numberOfOutputsReadsvm = resultReadsvm.NumberOfOutputs;

                    var trainingErrorReadsvm   = resultReadsvm.Training.Mean;
                    var validationErrorReadsvm = resultReadsvm.Validation.Mean;

                    var readCMsvm = resultReadsvm.ToConfusionMatrix(readAttr_d, readClass);
                    readAccuracysvm = readCMsvm.Accuracy;
                }
                catch (AggregateException) { }
                //////////////////////////////////////////////////////////
                try
                {
                    var crossvalidationGensvm = CrossValidation.Create(
                        k: 4,
                        learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            UseKernelEstimation = true
                        }
                    },
                        loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                        fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                        x: genAttr_d, y: genClass
                        );
                    var resultGensvm = crossvalidationGensvm.Learn(genAttr_d, genClass);
                    // We can grab some information about the problem:
                    var numberOfSamplesGensvm = resultGensvm.NumberOfSamples;
                    var numberOfInputsGensvm  = resultGensvm.NumberOfInputs;
                    var numberOfOutputsGensvm = resultGensvm.NumberOfOutputs;

                    var trainingErrorGensvm   = resultGensvm.Training.Mean;
                    var validationErrorGensvm = resultGensvm.Validation.Mean;
                    var genCMsvm = resultGensvm.ToConfusionMatrix(genAttr_d, genClass);
                    genAccuracysvm = genCMsvm.Accuracy;
                }
                catch (AggregateException) { }
                //////////////////////////////////////////////////////////
                try
                {
                    var crossvalidationMixsvm = CrossValidation.Create(
                        k: 4,
                        learner: (p) => new MulticlassSupportVectorLearning <Gaussian>()
                    {
                        Learner = (param) => new SequentialMinimalOptimization <Gaussian>()
                        {
                            UseKernelEstimation = true
                        }
                    },
                        loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                        fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                        x: mixAttr_d, y: mixClass
                        );
                    var resultMixsvm = crossvalidationMixsvm.Learn(mixAttr_d, mixClass);
                    // We can grab some information about the problem:
                    var numberOfSamplesMixsvm = resultMixsvm.NumberOfSamples;
                    var numberOfInputsMixsvm  = resultMixsvm.NumberOfInputs;
                    var numberOfOutputsMixsvm = resultMixsvm.NumberOfOutputs;

                    var trainingErrorMixsvm   = resultMixsvm.Training.Mean;
                    var validationErrorMixsvm = resultMixsvm.Validation.Mean;

                    var mixCMsvm = resultMixsvm.ToConfusionMatrix(mixAttr_d, mixClass);
                    mixAccuracysvm = mixCMsvm.Accuracy;
                }
                catch (AggregateException) { }
                /////////////////////////////////////////////////
                if (correctsvm == 0 && incorrectsvm == 0)
                {
                    incorrectsvm = 1;
                }
                double knnRatio = 100.0 * correctknn / (correctknn + incorrectknn),
                       nbRatio  = 100.0 * correctnb / (correctnb + incorrectnb),
                       svmRatio = 100.0 * correctsvm / (correctsvm + incorrectsvm);
                System.Windows.MessageBox.Show(
                    "K Nearest Neighbours Classification:\nGenerated Data Correct Ratio: " +
                    knnRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" +
                    "Original Data X-Validation Accuracy: "
                    + (100.0 * readAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Generated Data X-Validation Accuracy: "
                    + (100.0 * genAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Mixed Data X-Validation Accuracy: "
                    + (100.0 * mixAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n"
                    + "\n\n" + "Naive Bayes Classification:\nGenerated Data Correct Ratio: " +
                    nbRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" +
                    "Original Data X-Validation Accuracy: "
                    + (100.0 * readAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Generated Data X-Validation Accuracy: "
                    + (100.0 * genAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Mixed Data X-Validation Accuracy: "
                    + (100.0 * mixAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" +
                    "\n\n" + "Support Vector Machine Classification:\nGenerated Data Correct Ratio: " +
                    svmRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "%\n" +
                    "Original Data X-Validation Accuracy: "
                    + (100.0 * readAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Generated Data X-Validation Accuracy: "
                    + (100.0 * genAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n" + "Mixed Data X-Validation Accuracy: "
                    + (100.0 * mixAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture)
                    + "%\n",
                    "Data Testing - extending dataset",
                    System.Windows.MessageBoxButton.OK);

                /*
                 * ///TEMP - do eksportowania danych do arkusza
                 *
                 *  using (var write = new System.IO.StreamWriter("TestDataDump.txt")){
                 *      write.WriteLine("ScoreTreshold," + scoreH.ToString());
                 *      write.WriteLine("NewDataAmt," + newData.ToString());
                 *      write.WriteLine("Generated Data Correct Ratio," +
                 *          knnRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          nbRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) +"," +
                 *          svmRatio.ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *      write.WriteLine("Original Data X-Validation Accuracy," +
                 *          (100.0 * readAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * readAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * readAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *      write.WriteLine("Generated Data X-Validation Accuracy," +
                 *          (100.0 * genAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * genAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * genAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *  write.WriteLine("Mixed Data X-Validation Accuracy," +
                 *          (100.0 * mixAccuracy).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * mixAccuracynb).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture) + "," +
                 *          (100.0 * mixAccuracysvm).ToString("0.00", System.Globalization.CultureInfo.InvariantCulture));
                 *
                 * }
                 *  System.Diagnostics.Process.Start("TestDataDump.txt");
                 */
            }
            dialogResult = System.Windows.MessageBox.Show("Do you want to open the file with generated data?", "Data testing - extended data", System.Windows.MessageBoxButton.YesNo);
            if (dialogResult == MessageBoxResult.Yes)
            {
                System.Diagnostics.Process.Start(savefiledir);
            }
        }
 public void Setup()
 {
     NaiveBayesModel = new NaiveBayes();
 }
Exemple #6
0
        public void laplace_smoothing_missing_sample()
        {
            #region doc_laplace
            // To test the effectiveness of the Laplace rule for when
            // an example of a symbol is not present in the training set,
            // lets create dataset where the second column could contain
            // values 0, 1 or 2 but only actually contains examples with
            // containing 1 and 2:

            int[][] inputs =
            {
                //      input     output
                new [] { 0, 1 }, //  0
                new [] { 0, 2 }, //  0
                new [] { 0, 1 }, //  0
                new [] { 1, 2 }, //  1
                new [] { 0, 2 }, //  1
                new [] { 0, 2 }, //  1
                new [] { 1, 1 }, //  2
                new [] { 0, 1 }, //  2
                new [] { 1, 1 }, //  2
            };

            int[] outputs = // those are the class labels
            {
                0, 0, 0, 1, 1, 1, 2, 2, 2,
            };

            // Since the data is not enough to determine which symbols we are
            // expecting in our model, we will have to specify the model by
            // hand. The first column can assume 2 different values, whereas
            // the third column can assume 3:
            var bayes = new NaiveBayes(classes: 3, symbols: new[] { 2, 3 });

            // Now we can create a learning algorithm
            var learning = new NaiveBayesLearning()
            {
                Model = bayes
            };

            // Enable the use of the Laplace rule
            learning.Options.InnerOption.UseLaplaceRule = true;

            // Learn the Naive Bayes model
            learning.Learn(inputs, outputs);

            // Estimate a sample with 0 in the second col
            int answer = bayes.Decide(new int[] { 0, 1 });
            #endregion

            Assert.AreEqual(0, answer);

            double prob = bayes.Probability(new int[] { 0, 1 }, out answer);
            Assert.AreEqual(0, answer);
            Assert.AreEqual(0.52173913043478259, prob, 1e-10);

            double error = new ZeroOneLoss(outputs)
            {
                Mean = true
            }.Loss(bayes.Decide(inputs));

            Assert.AreEqual(2 / 9.0, error);
        }
Exemple #7
0
        public void ComputeTest2()
        {
            // Some sample texts
            string[] spamTokens = Tokenize(@"I decided to sign up for the Disney Half Marathon. Half of a marathon is 13.1 miles. A full marathon is 26.2 miles. You may wonder why the strange number of miles. “26.2” is certainly not an even number. And after running 26 miles who cares about the point two? You might think that 26.2 miles is a whole number of kilometers. It isn’t. In fact, it is even worse in kilometers – 42.1648128. I bet you don’t see many t-shirts in England with that number printed on the front.");

            string[] loremTokens = Tokenize(@"Lorem ipsum dolor sit amet,  Nulla nec tortor. Donec id elit quis purus consectetur consequat. Nam congue semper tellus. Sed erat dolor, dapibus sit amet, venenatis ornare, ultrices ut, nisi. Aliquam ante. Suspendisse scelerisque dui nec velit. Duis augue augue, gravida euismod, vulputate ac, facilisis id, sem. Morbi in orci. Nulla purus lacus, pulvinar vel, malesuada ac, mattis nec, quam. Nam molestie scelerisque quam. Nullam feugiat cursus lacus.orem ipsum dolor sit amet.");

            // Their respective classes
            string[] classes = { "spam", "lorem" };


            // Create a new Bag-of-Words for the texts
            BagOfWords bow = new BagOfWords(spamTokens, loremTokens)
            {
                // Limit the maximum number of occurrences in
                // the feature vector to a single instance
                MaximumOccurance = 1
            };

            // Define the symbols for the Naïve Bayes
            int[] symbols = new int[bow.NumberOfWords];
            for (int i = 0; i < symbols.Length; i++)
            {
                symbols[i] = bow.MaximumOccurance + 1;
            }

            // Create input and outputs for training
            int[][] inputs =
            {
                bow.GetFeatureVector(spamTokens),
                bow.GetFeatureVector(loremTokens)
            };

            int[] outputs =
            {
                0, // spam
                1, // lorem
            };

            // Create the naïve Bayes model
            NaiveBayes bayes = new NaiveBayes(2, symbols);

            for (int i = 0; i < bayes.ClassCount; i++)
            {
                for (int j = 0; j < bayes.SymbolCount.Length; j++)
                {
                    for (int k = 0; k < bayes.SymbolCount[j]; k++)
                    {
                        bayes.Distributions[i, j][k] = 1e-10;
                    }
                }
            }

            // Estimate the model
            bayes.Estimate(inputs, outputs);


            // Initialize with prior probabilities
            for (int i = 0; i < bayes.ClassCount; i++)
            {
                for (int j = 0; j < bayes.SymbolCount.Length; j++)
                {
                    double sum = bayes.Distributions[i, j].Sum();
                    Assert.AreEqual(1, sum, 1e-5);
                }
            }

            // Consume the model
            {
                // First an example to classify as lorem
                int[]  input  = bow.GetFeatureVector(loremTokens);
                int    answer = bayes.Compute(input);
                string result = classes[answer];

                Assert.AreEqual("lorem", result);
            }

            {
                // Then an example to classify as spam
                int[]  input  = bow.GetFeatureVector(spamTokens);
                int    answer = bayes.Compute(input);
                string result = classes[answer];

                Assert.AreEqual("spam", result);
            }
        }
        public static void TweetClassifier()
        {
            var context         = new OclumenContext();
            var classifier      = new NaiveBayes();
            var smoothingFactor = 1;

            var unigrams   = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();
            var unigramsSt = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();
            var bigrams    = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();
            var bigramsSt  = new Dictionary <string, List <KeyValuePair <Sentiment, decimal> > >();

            foreach (var ngram in context.BasicNgrams.Where(x => x.Cardinality == 1))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                unigrams.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                unigrams.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }

            foreach (var ngram in context.StemmedNgrams.Where(x => x.Cardinality == 1))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                unigramsSt.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                unigramsSt.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }

            foreach (var ngram in context.BasicNgrams.Where(x => x.Cardinality == 2))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                bigrams.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                bigrams.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }

            foreach (var ngram in context.StemmedNgrams.Where(x => x.Cardinality == 2))
            {
                var ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.PositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.NeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.NegativeCount + smoothingFactor));

                bigramsSt.Add(ngram.Text.ToLower(), ngramProbabilityList);

                ngramProbabilityList = new List <KeyValuePair <Sentiment, decimal> >(3);
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Positive, ngram.RtPositiveCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Neutral, ngram.RtNeutralCount + smoothingFactor));
                ngramProbabilityList.Add(new KeyValuePair <Sentiment, decimal>(Sentiment.Negative, ngram.RtNegativeCount + smoothingFactor));

                bigramsSt.Add(ngram.Text.ToLower() + "_rt", ngramProbabilityList);
            }


            int sleepTime = AppConfigSettings.ProcessTweetThreadSleepTime;

            while (CrawlerStatus.KeepRunning)
            {
                var tweetToProcess = context.RawTweets.First(x => x.AutoSentimentTimestamp == DateTime.MinValue);

                if (tweetToProcess == null)
                {
                    Thread.Sleep(sleepTime);
                    continue;
                }

                // add the hashtags to process queue
                String originalTweet;
                var    hashtags = TwitterTextUtility.GetHashtags(tweetToProcess.text);
                var    retweets = TwitterTextUtility.GetRetweets(tweetToProcess.text, out originalTweet);

                lock (_syncRoot)
                {
                    foreach (var hashtag in hashtags)
                    {
                        UnprocessedHashtags.Enqueue(hashtag);
                    }

                    foreach (var retweet in retweets)
                    {
                        UnprocessedRetweets.Enqueue(retweet);
                    }
                }

                // auto classify the tweets
                tweetToProcess.AutoUnigram        = (int)classifier.GetTextSentiment(tweetToProcess.text, 1, 1, false, Dictionary, context.BasicNgrams, context, unigrams);
                tweetToProcess.AutoUnigramStemmed = (int)classifier.GetTextSentiment(tweetToProcess.text, 1, 1, true, Dictionary, context.StemmedNgrams, context, unigramsSt);

                tweetToProcess.AutoBigram        = (int)classifier.GetTextSentiment(tweetToProcess.text, 2, 1, false, Dictionary, context.BasicNgrams, context, bigrams);
                tweetToProcess.AutoBigramStemmed = (int)classifier.GetTextSentiment(tweetToProcess.text, 2, 1, true, Dictionary, context.StemmedNgrams, context, bigramsSt);

                tweetToProcess.AutoSentimentTimestamp = DateTime.UtcNow;

                context.SaveChanges();
            }
        }
 public void Initialize()
 {
     _naiveBayes = new NaiveBayes();
 }
Exemple #10
0
 public NBLearner(Program parent, NaiveBayes target, Codification codebook)
 {
     Parent   = parent;
     Target   = target;
     Codebook = codebook;
 }
Exemple #11
0
        public void LearnModel()
        {
            Init();
            foreach (Feature currFeature in DomPool.SelectorFeatures)
            {
                String             featureString = currFeature.ToString();
                HashSet <HtmlNode> resNodes      = DomPool.RunXpathQuery(featureString);
                foreach (HtmlNode nd in resNodes)
                {
                    if (!allNodes.Contains(nd))
                    {
                        continue;
                    }
                    nodeFeatures[nd].Add(featureString);
                }
            }
            FastVector fvWekaAttributes = GetDataSetAtts();
            Instances  trainingSet      = new Instances("TS", fvWekaAttributes, 10);

            trainingSet.setClassIndex(fvWekaAttributes.size() - 1);

            foreach (HtmlNode currNode in allNodes)
            {
                Instance item = new SparseInstance(fvWekaAttributes.size());

                for (int i = 0; i < fvWekaAttributes.size() - 1; i++)
                {
                    weka.core.Attribute currFeature = (weka.core.Attribute)fvWekaAttributes.elementAt(i);
                    if (nodeFeatures[currNode].Contains(currFeature.name()))
                    {
                        item.setValue(currFeature, 1);
                    }
                    else
                    {
                        item.setValue(currFeature, 0);
                    }
                }

                //set the class
                weka.core.Attribute classFeature = (weka.core.Attribute)fvWekaAttributes.elementAt(fvWekaAttributes.size() - 1);
                item.setValue(classFeature, (DomPool.TargetNodes.Contains(currNode)?"yes":"no"));
                item.setDataset(trainingSet);
                if (DomPool.TargetNodes.Contains(currNode))
                {
                    for (int t = 0; t < (DomPool.NonTargetNodes.Count() / DomPool.TargetNodes.Count()); t++)
                    {
                        trainingSet.add(new SparseInstance(item));
                    }
                }
                else
                {
                    trainingSet.add(item);
                }
            }

            //String[] options = new String[2];
            //options = new string[] { "-C", "0.05" };            // unpruned tree
            NaiveBayes cls = new NaiveBayes();         // new instance of tree

            //cls.setOptions(weka.core.Utils.splitOptions("-C 1.0 -L 0.0010 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\""));
            //cls.setOptions(options);     // set the options
            cls.buildClassifier(trainingSet);  // build classifier
            //save the resulting classifier
            classifier = cls;

            //  Reader treeDot = new StringReader(tree.graph());
            //  TreeBuild treeBuild = new TreeBuild();
            //  Node treeRoot = treeBuild.create(treeDot);
            FeaturesUsed = new HashSet <string>();

            foreach (Feature f in DomPool.SelectorFeatures)
            {
                FeaturesUsed.Add(f.ToString());
            }
        }
        private static double CalculateAccuracy(List <int> indicators, int mlAlgorithm, bool isCrossValidationEnabled, int minRowCount, double trainingSetPercentage, double[] smaOut, double[] wmaOut, double[] emaOut, double[] macdOut, double[] rsiOut, double[] williamsROut, double[] stochasticsOut, double[] closesOut)
        {
            FeatureVector vector = new FeatureVector();

            if (indicators.Contains(IndicatorService.SMA))
            {
                vector.AddColumn("SMA", smaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.WMA))
            {
                vector.AddColumn("WMA", wmaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.EMA))
            {
                vector.AddColumn("EMA", emaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.MACD))
            {
                vector.AddColumn("MACD", macdOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.RSI))
            {
                vector.AddColumn("RSI", rsiOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.WilliamsR))
            {
                vector.AddColumn("WilliamsR", williamsROut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (indicators.Contains(IndicatorService.Stochastics))
            {
                vector.AddColumn("Stochastics", stochasticsOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            vector.AddColumn("label", closesOut.Select(p => (object)string.Format("{0:0.0}", p).ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());

            new CSVExporter(vector).Export("c:\\users\\yasin\\indicatorOutput.csv");
            int           count    = vector.Values[0].Length;
            FeatureVector training = new FeatureVector();

            for (int i = 0; i < vector.ColumnName.Count; i++)
            {
                training.AddColumn(vector.ColumnName[i], vector.Values[i].Take((int)(count * trainingSetPercentage)).ToArray());
            }

            FeatureVector test = new FeatureVector();

            for (int i = 0; i < vector.ColumnName.Count; i++)
            {
                test.AddColumn(vector.ColumnName[i], vector.Values[i].Skip((int)(count * trainingSetPercentage)).Take(count).ToArray());
            }

            double accuracy = 0;

            if (mlAlgorithm == MLAService.LIN_REG)
            {
                var linReg = new LinearRegression();
                var bce    = new BinaryClassificationEvaluator();
                if (isCrossValidationEnabled)
                {
                    var cv          = new CrossValidator(linReg, bce, 10);
                    var cvModel     = (CrossValidatorModel)cv.Fit(training);
                    var predictions = cvModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
                else
                {
                    var linRegModel = (LinearRegressionModel)linReg.Fit(training);
                    var predictions = linRegModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
            }
            else if (mlAlgorithm == MLAService.LOG_REG)
            {
                var logReg = new LogisticRegression();
                var bce    = new BinaryClassificationEvaluator();
                if (isCrossValidationEnabled)
                {
                    var cv          = new CrossValidator(logReg, bce, 10);
                    var cvModel     = (CrossValidatorModel)cv.Fit(training);
                    var predictions = cvModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
                else
                {
                    var logRegModel = (LogisticRegressionModel)logReg.Fit(training);
                    var predictions = logRegModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
            }
            else if (mlAlgorithm == MLAService.NAI_BAY)
            {
                var naiBay = new NaiveBayes();
                var bce    = new BinaryClassificationEvaluator();
                if (isCrossValidationEnabled)
                {
                    var cv          = new CrossValidator(naiBay, bce, 10);
                    var cvModel     = (CrossValidatorModel)cv.Fit(training);
                    var predictions = cvModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
                else
                {
                    var naiBayModel = (NaiveBayesModel)naiBay.Fit(training);
                    var predictions = naiBayModel.transform(test);
                    bce.evaluate(predictions);
                    accuracy = bce.Accuracy;
                }
            }
            return(accuracy);
        }
Exemple #13
0
 // Creates Naive Bayes Machine
 public void CreateNaiveBayes()
 {
     string[] cols = { "Array Size", "Runs" };
     nb = new NaiveBayes(ClassCount, SymbolCounts);
 }
Exemple #14
0
        static void Main(string[] args)
        {
            /*
             * Takes a csv files as input and trains a naive bayes classfier, if the test flag is set the rountine
             * will calculate the accuracy of the input files using the previous saved model in the exeution directioy
             * If the test flag is set a new classifier is not trainied
             * but the previous model is loaded and used agains the test data.
             *
             * arg 1 = training file or test file
             * arg 2 = label file
             * arg 3 = test flag (-s or -S)
             * arg 4 = Specify file name of model file
             */

            const int minargs = 2;
            const int maxargs = 4;
            const int Folds   = 4;

            Accord.Math.Random.Generator.Seed = 0;
            string trainingFname = null;
            string labelFname    = null;
            string modelFname    = "NBmodel.sav"; // Default model file name
            bool   NoTrain       = false;

            Functions.Welcome();
            int numArgs = Functions.parseCommandLine(args, maxargs, minargs);

            if (numArgs == 0)
            {
                Console.WriteLine(Strings.resources.usage);
                System.Environment.Exit(1);
            }

            if (numArgs == 2)
            {
                trainingFname = args[0];
                labelFname    = args[1];
            }
            if (numArgs == 3) // no use for third parameter yet!
            {
                if (args[2] == ("-s") | args[2] == ("-S"))
                {
                    NoTrain       = true;
                    trainingFname = args[0];
                    labelFname    = args[1];
                }
                else
                {
                    Console.WriteLine(Strings.resources.usage);
                    System.Environment.Exit(1);
                }
            }

            if (numArgs == 4)
            {
                NoTrain       = true;
                trainingFname = args[0];
                labelFname    = args[1];
                modelFname    = args[3];
            }
            //
            // Check if the training and label files exist and are not locked by anohter process
            //

            if (!Utility.Functions.checkFile(trainingFname))
            {
                Console.WriteLine("Error opening file{0}", trainingFname);
                System.Environment.Exit(1);
            }
            if (!Functions.checkFile(labelFname))
            {
                Console.WriteLine("Error opening file {0}", labelFname);
                System.Environment.Exit(1);
            }

            //
            // Read in the training and label files, CSV format
            //
            CsvReader training_samples = new CsvReader(trainingFname, false);

            int[,] MatrixIn = training_samples.ToMatrix <int>();
            int[][] trainingset = Functions.convertToJaggedArray(MatrixIn);

            //
            // Naive Bayes gets trained on integer arrays or arrays of "strings"
            //
            CsvReader label_samples = new CsvReader(labelFname, false);

            int[,] labelsIn = label_samples.ToMatrix <int>(); // COnvert the labels to a matrix and then to jagged array
            int[][] LabelSet = Functions.convertToJaggedArray(labelsIn);
            int[]   output   = Functions.convertTointArray(LabelSet);

            NaiveBayes loaded_nb;   // setup for loading a trained model if one exists

            if (!NoTrain)
            {
                // Create a new Naive Bayes learning instance
                var learner = new NaiveBayesLearning();

                // Create a Naive Bayes classifier and train with the input datasets
                NaiveBayes classifier = learner.Learn(trainingset, output);

                /* Cross-validation is a technique for estimating the performance of a predictive model.
                 * It can be used to measure how the results of a statistical analysis will generalize to
                 * an independent data set. It is mainly used in settings where the goal is prediction, and
                 * one wants to estimate how accurately a predictive model will perform in practice.
                 *
                 * One round of cross-validation involves partitioning a sample of data into complementary
                 * subsets, performing the analysis on one subset (called the training set), and validating
                 * the analysis on the other subset (called the validation set or testing set). To reduce
                 * variability, multiple rounds of cross-validation are performed using different partitions,
                 * and the validation results are averaged over the rounds
                 */

                // Gets results based on performing a k-fold cross validation based on the input training set
                // Create a cross validation instance


                var cv = CrossValidation.Create(k: Folds, learner: (p) => new NaiveBayesLearning(),
                                                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                                                fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                                                x: trainingset, y: output);

                var result = cv.Learn(trainingset, output);

                Console.WriteLine("Performing n-fold cross validation where n = {0}", cv.K);

                // We can grab some information about the problem:
                Console.WriteLine("Cross Validation Results");
                Console.WriteLine("     number of samples {0}", result.NumberOfSamples);
                Console.WriteLine("     number of features: {0}", result.NumberOfInputs);
                Console.WriteLine("     number of outputs {0}", result.NumberOfOutputs);
                Console.WriteLine("     Training Error: {0:n2}", result.Training.Mean); // should be 0 or no
                Console.WriteLine("     Validation Mean: {0}\n", result.Validation.Mean);

                Console.WriteLine("Creating General Confusion Matrix from Cross Validation");
                GeneralConfusionMatrix gcm = result.ToConfusionMatrix(trainingset, output);
                double accuracy            = gcm.Accuracy; // should be 0.625
                Console.WriteLine(" GCM Accuracy {0}%\n", accuracy * 100);


                ConfusionMatrix cm = ConfusionMatrix.Estimate(classifier, trainingset, output);
                Console.WriteLine("Confusion Error {0}", cm.Error);
                Console.WriteLine("Confusion accuracy {0}", cm.Accuracy);
                double tp     = cm.TruePositives;
                double tn     = cm.TrueNegatives;
                double fscore = cm.FScore;
                double fp     = cm.FalsePositives;
                double fn     = cm.FalseNegatives;
                Console.WriteLine("TP = {0},TN = {1}, FP = {2}, FN = {3}, Fscore = {4} ", tp, tn, fp, fn, fscore);


                // Save the model created from the training set

                classifier.Save("NBmodel.sav", compression: SerializerCompression.None);
                Console.WriteLine("Successfully saved the model");
            }
            else
            {
                // load a previous model
                loaded_nb = Serializer.Load <NaiveBayes>(modelFname);                               // Load the model
                int[]  results  = loaded_nb.Decide(trainingset);                                    // Make preditions from the input
                double accuracy = Functions.CalculateAccuraccy(output, results);
                Console.WriteLine("Accuracy of predictions = {0}%", Math.Round(accuracy * 100, 2)); // Compare the predicions to the labels
            }
        }
Exemple #15
0
        private void btnCreate_Click(object sender, EventArgs e)
        {
            if (dgvLearningSource.DataSource == null)
            {
                MessageBox.Show("Please load some data first.");
                return;
            }

            sourceClasses = new string[] { "G1", "G2" };


            // Finishes and save any pending changes to the given data
            dgvLearningSource.EndEdit();

            // Creates a matrix from the source data table
            double[,] sourceMatrix = (dgvLearningSource.DataSource as DataTable).ToMatrix(out sourceColumns);

            // Get only the input vector values
            double[][] inputs = sourceMatrix.Submatrix(null, 0, 1).ToArray();

            // Get only the label outputs
            int[] outputs = sourceMatrix.GetColumn(2).ToInt32();
            string[] colNames = sourceColumns.Submatrix(first: 2);


            // Create the Bayes classifier and perform classification
            bayes = new NaiveBayes<NormalDistribution>(2, 2, new NormalDistribution());
            double error = bayes.Estimate(inputs, outputs);


            // Show the estimated distributions and class probabilities
            dataGridView1.DataSource = new ArrayDataView(bayes.Distributions, colNames, sourceClasses);


            // Generate samples for class 1
            var x1 = bayes.Distributions[0, 0].Generate(1000);
            var y1 = bayes.Distributions[0, 1].Generate(1000);

            // Generate samples for class 2
            var x2 = bayes.Distributions[1, 0].Generate(1000);
            var y2 = bayes.Distributions[1, 1].Generate(1000);

            // Combine in a single graph
            var w1 = Matrix.Stack(x1, y1).Transpose();
            var w2 = Matrix.Stack(x2, y2).Transpose();

            var z = Matrix.Vector(2000, value: 1.0);
            for (int i = 0; i < 1000; i++) z[i] = 0;

            var graph = Matrix.Stack(w1, w2).Concatenate(z);

            CreateScatterplot(zedGraphControl2, graph);
        }
Exemple #16
0
        private void buttonForDataSplitNext_Click(object sender, EventArgs e)
        {
            trainingSetPercentage = (double)numericUpDownForTrainingSetPercent.Value / 100.0;
            numFolds = (int)numericUpDownForNumFolds.Value;

            double[] smaOut         = null;
            double[] wmaOut         = null;
            double[] emaOut         = null;
            double[] macdOut        = null;
            double[] stochasticsOut = null;
            double[] williamsROut   = null;
            double[] rsiOut         = null;
            double[] closesOut      = null;

            var data = IndicatorService.GetData(code, targetDate, new string[] { "Tarih", "Kapanis" }, numberOfData + 1);

            if (isSMAChecked)
            {
                smaOut = IndicatorDataPreprocessor.GetSMAOut(MovingAverage.Simple(code, targetDate, smaPeriod, numberOfData));
            }
            if (isWMAChecked)
            {
                wmaOut = IndicatorDataPreprocessor.GetWMAOut(MovingAverage.Weighted(code, targetDate, wmaPeriod, numberOfData));
            }
            if (isEMAChecked)
            {
                emaOut = IndicatorDataPreprocessor.GetEMAOut(MovingAverage.Exponential(code, targetDate, emaPeriod, numberOfData));
            }
            if (isMACDChecked)
            {
                macdOut = IndicatorDataPreprocessor.GetMACDOut(new MovingAverageConvergenceDivergence(code, targetDate, firstPeriod, secondPeriod, triggerPeriod, numberOfData));
            }
            if (isStochasticsChecked)
            {
                stochasticsOut = IndicatorDataPreprocessor.GetStochasticsOut(new Stochastics(code, targetDate, fastKPeriod, fastDPeriod, slowDPeriod, numberOfData));
            }
            if (isWilliamsRChecked)
            {
                williamsROut = IndicatorDataPreprocessor.GetWilliamsROut(WilliamsR.Wsr(code, targetDate, williamsRPeriod, numberOfData));
            }
            if (isRSIChecked)
            {
                rsiOut = IndicatorDataPreprocessor.GetRSIOut(RelativeStrengthIndex.Rsi(code, targetDate, rsiPeriod, numberOfData));
            }
            closesOut = IndicatorDataPreprocessor.GetClosesOut(numberOfData, data);

            int minRowCount = 1000000;

            if (smaOut != null)
            {
                minRowCount = smaOut.Length;
            }
            if (wmaOut != null)
            {
                minRowCount = minRowCount < wmaOut.Length ? minRowCount : wmaOut.Length;
            }
            if (emaOut != null)
            {
                minRowCount = minRowCount < emaOut.Length ? minRowCount : emaOut.Length;
            }
            if (macdOut != null)
            {
                minRowCount = minRowCount < macdOut.Length ? minRowCount : macdOut.Length;
            }
            if (rsiOut != null)
            {
                minRowCount = minRowCount < rsiOut.Length ? minRowCount : rsiOut.Length;
            }
            if (williamsROut != null)
            {
                minRowCount = minRowCount < williamsROut.Length ? minRowCount : williamsROut.Length;
            }
            if (stochasticsOut != null)
            {
                minRowCount = minRowCount < stochasticsOut.Length ? minRowCount : stochasticsOut.Length;
            }
            if (closesOut != null)
            {
                minRowCount = minRowCount < closesOut.Length ? minRowCount : closesOut.Length;
            }

            var fv = new FeatureVector();

            if (isSMAChecked)
            {
                fv.AddColumn("SMA", smaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isWMAChecked)
            {
                fv.AddColumn("WMA", wmaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isEMAChecked)
            {
                fv.AddColumn("EMA", emaOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isMACDChecked)
            {
                fv.AddColumn("MACD", macdOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isRSIChecked)
            {
                fv.AddColumn("RSI", rsiOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isWilliamsRChecked)
            {
                fv.AddColumn("WilliamsR", williamsROut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            if (isStochasticsChecked)
            {
                fv.AddColumn("Stochastics", stochasticsOut.Select(p => (object)p.ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());
            }
            fv.AddColumn("label", closesOut.Select(p => (object)string.Format("{0:0.0}", p).ToString(CultureInfo.InvariantCulture)).Take(minRowCount).ToArray());

            var training = new FeatureVector();
            var test     = new FeatureVector();
            int count    = fv.Values[0].Length;

            for (int i = 0; i < fv.ColumnName.Count; i++)
            {
                training.AddColumn(fv.ColumnName[i], fv.Values[i].Take((int)(count * trainingSetPercentage)).ToArray());
            }

            for (int i = 0; i < fv.ColumnName.Count; i++)
            {
                test.AddColumn(fv.ColumnName[i], fv.Values[i].Skip((int)(count * trainingSetPercentage)).Take(count).ToArray()); // Take(count) means take the rest of all elements, number of the rest of the elements is smaller than count.
            }

            if (numFolds > 0)
            {
                BinaryClassificationEvaluator bce1    = new BinaryClassificationEvaluator();
                LinearRegression    linearRegression  = new LinearRegression();
                CrossValidator      cvLinReg          = new CrossValidator(linearRegression, bce1, numFolds);
                CrossValidatorModel cvLinRegModel     = (CrossValidatorModel)cvLinReg.Fit(training);
                FeatureVector       linRegPredictions = cvLinRegModel.transform(test);
                bce1.evaluate(linRegPredictions);
                linRegAcc = bce1.Accuracy;

                BinaryClassificationEvaluator bce2 = new BinaryClassificationEvaluator();
                LogisticRegression            logisticRegression = new LogisticRegression();
                CrossValidator      cvLogReg          = new CrossValidator(logisticRegression, bce2, numFolds);
                CrossValidatorModel cvLogRegModel     = (CrossValidatorModel)cvLogReg.Fit(training);
                FeatureVector       logRegPredictions = cvLogRegModel.transform(test);
                bce2.evaluate(logRegPredictions);
                logRegAcc = bce2.Accuracy;

                BinaryClassificationEvaluator bce3    = new BinaryClassificationEvaluator();
                NaiveBayes          naiveBayes        = new NaiveBayes();
                CrossValidator      cvNaiBay          = new CrossValidator(naiveBayes, bce3, numFolds);
                CrossValidatorModel cvNaiBayModel     = (CrossValidatorModel)cvNaiBay.Fit(training);
                FeatureVector       naiBayPredictions = cvNaiBayModel.transform(test);
                bce3.evaluate(naiBayPredictions);
                naiBayAcc = bce3.Accuracy;
            }
            else
            {
                BinaryClassificationEvaluator bce1          = new BinaryClassificationEvaluator();
                LinearRegression      linearRegression      = new LinearRegression();
                LinearRegressionModel linearRegressionModel = (LinearRegressionModel)linearRegression.Fit(training);
                FeatureVector         linRegPredictions     = linearRegressionModel.transform(test);
                bce1.evaluate(linRegPredictions);
                linRegAcc = bce1.Accuracy;

                BinaryClassificationEvaluator bce2 = new BinaryClassificationEvaluator();
                LogisticRegression            logicticRegression      = new LogisticRegression();
                LogisticRegressionModel       logisticRegressionModel = (LogisticRegressionModel)logicticRegression.Fit(training);
                FeatureVector logRegPredictions = logisticRegressionModel.transform(test);
                bce2.evaluate(logRegPredictions);
                logRegAcc = bce2.Accuracy;

                BinaryClassificationEvaluator bce3 = new BinaryClassificationEvaluator();
                NaiveBayes      naiveBayes         = new NaiveBayes();
                NaiveBayesModel naiveBayesModel    = (NaiveBayesModel)naiveBayes.Fit(training);
                FeatureVector   naiBayPredictions  = naiveBayesModel.transform(test);
                bce3.evaluate(naiBayPredictions);
                naiBayAcc = bce3.Accuracy;
            }

            labelForLinRegAcc.Text = linRegAcc.ToString();
            labelForLogRegAcc.Text = logRegAcc.ToString();
            labelForNaiBayAcc.Text = naiBayAcc.ToString();

            panelForResults.BringToFront();
        }
Exemple #17
0
        public void ComputeTest()
        {
            #region doc_mitchell
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");
            #endregion

            #region doc_codebook
            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data,
                                                     "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // Extract input and output pairs to train
            DataTable symbols = codebook.Apply(data);
            int[][]   inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            int[]     outputs = symbols.ToArray <int>("PlayTennis");
            #endregion

            #region doc_learn
            // Create a new Naive Bayes learning
            var learner = new NaiveBayesLearning();

            // Learn a Naive Bayes model from the examples
            NaiveBayes nb = learner.Learn(inputs, outputs);
            #endregion


            #region doc_test
            // Consider we would like to know whether one should play tennis at a
            // sunny, cool, humid and windy day. Let us first encode this instance
            int[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong");

            // Let us obtain the numeric output that represents the answer
            int c = nb.Decide(instance); // answer will be 0

            // Now let us convert the numeric output to an actual "Yes" or "No" answer
            string result = codebook.Translate("PlayTennis", c); // answer will be "No"

            // We can also extract the probabilities for each possible answer
            double[] probs = nb.Probabilities(instance); // { 0.795, 0.205 }
            #endregion

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, probs[0], 1e-3);
            Assert.AreEqual(0.205, probs[1], 1e-3);
            Assert.AreEqual(1, probs.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(probs[0]));
            Assert.AreEqual(2, probs.Length);
        }
    // Use this for initialization
    void Start()
    {
        numChars = characters.transform.childCount;
        //		Debug.Log (numChars);
        behaviourScripts = new MasterBehaviour[numChars];

        for (int i = 0; i < numChars; i++) {
            MasterBehaviour mb = characters.transform.GetChild(i).GetComponent<MasterBehaviour>();
            mb.Starta(plane, nodeSize, sniper.transform.position);
            behaviourScripts[i] = mb;
        }

        pathfinder = new PathfindingScheduler ();
        pathFindingChars = new LinkedList<GameObject> ();
        deadSet = new List<GameObject>();
        seenDeadSet = new List<Vector3> ();

        shootDistance = 10f;
        sightDist = 100.0f;

        maxDist = 100f;
        NB = new NaiveBayes (behaviourScripts [0].reachGoal.state.sGrid.hiddenSpaceCost, maxDist);
        NB.Starta ();
        currentlySearching = new List<MasterBehaviour> ();
    }
Exemple #19
0
        public void ComputeTest2()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            // We will set Temperature and Humidity to be continuous
            data.Columns["Temperature"].DataType = typeof(double);
            data.Columns["Humidity"].DataType    = typeof(double);

            data.Rows.Add("D1", "Sunny", 38.0, 96.0, "Weak", "No");
            data.Rows.Add("D2", "Sunny", 39.0, 90.0, "Strong", "No");
            data.Rows.Add("D3", "Overcast", 38.0, 75.0, "Weak", "Yes");
            data.Rows.Add("D4", "Rain", 25.0, 87.0, "Weak", "Yes");
            data.Rows.Add("D5", "Rain", 12.0, 30.0, "Weak", "Yes");
            data.Rows.Add("D6", "Rain", 11.0, 35.0, "Strong", "No");
            data.Rows.Add("D7", "Overcast", 10.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", 24.0, 90.0, "Weak", "No");
            data.Rows.Add("D9", "Sunny", 12.0, 26.0, "Weak", "Yes");
            data.Rows.Add("D10", "Rain", 25, 30.0, "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", 26.0, 40.0, "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", 27.0, 97.0, "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", 39.0, 41.0, "Weak", "Yes");
            data.Rows.Add("D14", "Rain", 23.0, 98.0, "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data);

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)
            int inputCount = 4;                              // 4 variables (Outlook, Temperature, Humidity, Wind)

            IUnivariateDistribution[] priors =
            {
                new GeneralDiscreteDistribution(codebook["Outlook"].Symbols),   // 3 possible values (Sunny, overcast, rain)
                new NormalDistribution(),                                       // Continuous value (Celsius)
                new NormalDistribution(),                                       // Continuous value (percentage)
                new GeneralDiscreteDistribution(codebook["Wind"].Symbols)       // 2 possible values (Weak, strong)
            };

            // Create a new Naive Bayes classifiers for the two classes
            var target = new NaiveBayes <IUnivariateDistribution>(classCount, inputCount, priors);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            double[][] inputs  = symbols.ToArray("Outlook", "Temperature", "Humidity", "Wind");
            int[]      outputs = symbols.ToArray <int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;

            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            double[] instance = new double[]
            {
                codebook.Translate(columnName: "Outlook", value: "Sunny"),
                12.0,
                90.0,
                codebook.Translate(columnName: "Wind", value: "Strong")
            };

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.840, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
Exemple #20
0
        static void Main(string[] args)
        {
            int[] a = { 4, 2, 1, 7, 9, 8, 5, 3, 1, 0, -5, -19, 5, 2, 6, 7, 8, 2, 23, 5, 8, 3, 10, -20 };
            //Vector<int> v = new Vector<int>(a);
            //foreach (var item in temp)
            //{
            //	Console.WriteLine(item);
            //}

            //double b1, b0;
            //LinearRegression linear = new LinearRegression();
            //linear.LoadData("data1.csv");
            //linear.Solve(out b0, out b1);
            //Console.WriteLine("Y = "+b1+"*X"+(b0 < 0 ? " "+b0 : " +" + b0));
            //var res = 145 * b1 + b0;
            //Console.WriteLine(res);


            //var temp = System.IO.File.ReadAllLines("test-features.txt");
            //System.Text.StringBuilder builder = new System.Text.StringBuilder();
            //foreach (var item in temp)
            //{
            //	var str = item.Trim().Replace(' ', ',');
            //	builder.AppendLine(str);
            //}
            //System.IO.File.WriteAllText("test1.csv", "");
            //System.IO.File.WriteAllText("test1.csv", builder.ToString());

            NaiveBayes na = new NaiveBayes("data_banknote_authentication.csv", 0.8);

            string[] labels    = { "variance", "skewness", "curtosis", "entropy", "class" };
            var      summarize = na.summarizeByClass(na.TrainingSet);

            for (int i = 0; i < summarize.Count; i++)
            {
                var item = summarize[i];
                Console.WriteLine("class " + i + " : ");
                for (int j = 0; j < item.Count; j++)
                {
                    var item2 = item[j];
                    Console.WriteLine(labels[j] + " -> mean : " + item2.First + " |  standard deviation : " + item2.Second);
                }
                Console.WriteLine();
            }

            Console.WriteLine("----------------------------------------------------------");
            Vector <double> pred = na.Predictions(summarize, na.TestSet);

            //foreach (var item in pred)
            //{
            //	Console.Write(item + " ");
            //}
            Console.WriteLine();
            Console.WriteLine("My Implement Accuracy : " + na.Accuracy(na.TestSet, pred).ToString("0.00") + "%");



            //foreach (var item in temp)
            //{
            //	Console.WriteLine(item);
            //}


            //Console.WriteLine(v.BinarySearch(23));
            //Console.WriteLine(v.Count);
            //Console.WriteLine(v.Capactity);

            //int[][] G =
            //{
            //	new int[] {0,1,0,0,0,0,1},
            //	new int[] {1,0,1,1,0,0,0},
            //	new int[] {0,1,0,0,0,0,0},
            //	new int[] {0,1,0,0,1,1,0},
            //	new int[] {0,0,0,1,0,0,0},
            //	new int[] {0,0,0,1,0,0,0},
            //	new int[] {1,0,0,0,0,0,0},
            //};
            //GraphSearch graph = new GraphSearch(G);
            //graph.recursiveDfs(0);

            //int[][] G1 = {  new int[]{0, 4, 0, 0, 0, 0, 0, 8, 0},
            //				new int[]{4, 0, 8, 0, 0, 0, 0, 11, 0},
            //				new int[]{0, 8, 0, 7, 0, 4, 0, 0, 2},
            //				new int[]{0, 0, 7, 0, 9, 14, 0, 0, 0},
            //				new int[]{0, 0, 0, 9, 0, 10, 0, 0, 0},
            //				new int[]{0, 0, 4, 14, 10, 0, 2, 0, 0},
            //				new int[]{0, 0, 0, 0, 0, 2, 0, 1, 6},
            //				new int[]{8, 11, 0, 0, 0, 0, 1, 0, 7},
            //				new int[]{0, 0, 2, 0, 0, 0, 6, 7, 0}
            //			};
            //GraphWithWeight GW = new GraphWithWeight(G1);
            //var test = GW.Dijkstra_PriorityQueue(0, 8);
            //Console.WriteLine(test);
            //Console.WriteLine(GW.Prim(3));

            //int[][] G2 =
            //{
            //	new int[]{0, 8, 12, 0, 0, 0, 0, 0, 0},
            //	new int[]{8, 0, 13, 25, 9, 0, 0, 0, 0},
            //	new int[]{12, 13, 0, 14, 0, 0, 21, 0, 0},
            //	new int[]{0, 25, 14, 0, 20, 8, 12, 12, 16},
            //	new int[]{0, 9, 0, 20, 0, 19, 0, 0, 0},
            //	new int[]{0, 0, 0, 8, 19, 0, 0, 11, 0},
            //	new int[]{0, 0, 21, 12, 0, 0, 0, 0, 11},
            //	new int[]{0, 0, 0, 12, 0, 11, 0, 0, 9},
            //	new int[]{0, 0, 0, 16, 0, 0, 11, 9, 0}
            //};
            //GW = new GraphWithWeight(G2);
            //Console.WriteLine(GW.Prim(4));
            //Console.WriteLine(GW.Kruskal());

            //Vector<SV> vector = new Vector<SV>();
            //vector.Add(new SV() {a=1,b=100 });
            //vector.Add(new SV() { a = 1, b = 100 });
            //vector.Add(new SV() { a = 2, b = 200 });
            //vector.Add(new SV() { a = 4, b = 300 });
            //vector.Add(new SV() { a = 10, b = 600 });
            //vector.Add(new SV() { a = 9, b = 1000 });
            //vector.Add(new SV() { a = 5, b = 1200 });

            //vector.Sort();
            //foreach (var item in vector)
            //{
            //	Console.WriteLine(item.a+" - "+item.b);
            //


            //GeneralGraph MG = new GeneralGraph(6);
            //MG.AddEdge(0, 1);
            //MG.AddEdge(1, 2);
            //MG.AddEdge(2, 0);
            //MG.AddEdge(3, 4);
            //MG.AddEdge(4, 5);
            //MG.AddEdge(5, 3);

            //int[] res;
            //Console.WriteLine(MG.GetMaximumMatching(out res));
            //foreach (var item in res)
            //{
            //	Console.WriteLine(item);
            //}



            //Console.WriteLine(graph.BFS());
            //var temp = graph.BFS(0, 5);
            //foreach (var item in temp) Console.Write(item+" ");


            //Vector<int> v = new Vector<int>();
            //v[6] = 25;
            //Console.WriteLine(v.Count + "|" + v.Capactity);



            //foreach (var item in v)
            //{
            //	Console.WriteLine(item);
            //}


            //Random rnd = new Random();
            //for (int i = 0; i < a.Length; i++)
            //{
            //	a[i] = rnd.Next(-100, 100);
            //}



            //var temp1 = (int[])a.Clone();
            //var temp2 = (int[])a.Clone();
            //var temp3 = (int[])a.Clone();

            //Stopwatch timer = new Stopwatch();
            //timer.Start();
            //SortAlgo<int>.QuickSort(a, 0, a.Length - 1);
            //timer.Stop();
            //Console.WriteLine("Time : " + timer.ElapsedMilliseconds / 1000.00 + "s");

            //timer.Start();
            //SortAlgo<int>.HeapSort(temp1);
            //timer.Stop();
            //Console.WriteLine("Time : " + timer.ElapsedMilliseconds / 1000.00 + "s");

            //timer.Start();
            //SortAlgo<int>.MergeSort(temp2, 0, temp1.Length - 1);
            //timer.Stop();
            //Console.WriteLine("Time : " + timer.ElapsedMilliseconds / 1000.00 + "s");

            //timer.Start();
            //SortAlgo<int>.TimSort(temp3,1<<10);
            //timer.Stop();
            //Console.WriteLine("Time : " + timer.ElapsedMilliseconds / 1000.00 + "s");

            //string a = "ababcab";
            //a = "abc@" + a;
            //foreach (var item in a.ToCharArray().ZAlgorithm())
            //{
            //	Console.Write(item + " ");
            //}
            //Console.WriteLine(a.ZAlgoSearch("abc")[0]);



            //Scanner scanner = new Scanner();
            //int n = scanner.NextInt();
            //int m = scanner.NextInt();
            //List<PointInt2D> list = new List<PointInt2D>();
            //for (int i = 0; i < n; i++)
            //{
            //    PointInt2D tmp = new PointInt2D(scanner.NextLong(), scanner.NextLong());
            //    list.Add(tmp);
            //}

            //var covexHull = GeometryUtils.ConvexHull(list);
            //foreach (var item in covexHull)
            //{
            //    Console.WriteLine(item.ToString());
            //}
            //double res = Double.MaxValue;
            //for (int i = 0; i < m; i++)
            //{
            //    double S, d;
            //    PointInt2D tmp = new PointInt2D(scanner.NextLong(), scanner.NextLong());
            //    for (int j = 0; j < covexHull.Count - 1; j++)
            //    {
            //        S = GeometryUtils.TriangleDoubledSquare(tmp, covexHull[j], covexHull[j + 1]);
            //        d = Math.Sqrt(covexHull[j].DistDQ(covexHull[j + 1]));
            //        res = Math.Min(res, S / d);
            //    }
            //}
            //Console.WriteLine(res);
            Console.ReadKey();
        }
Exemple #21
0
        public void ComputeTest_Obsolete()
        {
            DataTable data = new DataTable("Mitchell's Tennis Example");

            data.Columns.Add("Day", "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            data.Rows.Add("D1", "Sunny", "Hot", "High", "Weak", "No");
            data.Rows.Add("D2", "Sunny", "Hot", "High", "Strong", "No");
            data.Rows.Add("D3", "Overcast", "Hot", "High", "Weak", "Yes");
            data.Rows.Add("D4", "Rain", "Mild", "High", "Weak", "Yes");
            data.Rows.Add("D5", "Rain", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D6", "Rain", "Cool", "Normal", "Strong", "No");
            data.Rows.Add("D7", "Overcast", "Cool", "Normal", "Strong", "Yes");
            data.Rows.Add("D8", "Sunny", "Mild", "High", "Weak", "No");
            data.Rows.Add("D9", "Sunny", "Cool", "Normal", "Weak", "Yes");
            data.Rows.Add("D10", "Rain", "Mild", "Normal", "Weak", "Yes");
            data.Rows.Add("D11", "Sunny", "Mild", "Normal", "Strong", "Yes");
            data.Rows.Add("D12", "Overcast", "Mild", "High", "Strong", "Yes");
            data.Rows.Add("D13", "Overcast", "Hot", "Normal", "Weak", "Yes");
            data.Rows.Add("D14", "Rain", "Mild", "High", "Strong", "No");

            // Create a new codification codebook to
            // convert strings into discrete symbols
            Codification codebook = new Codification(data,
                                                     "Outlook", "Temperature", "Humidity", "Wind", "PlayTennis");

            int[] symbolCounts =
            {
                codebook["Outlook"].Symbols,     // 3 possible values (Sunny, overcast, rain)
                codebook["Temperature"].Symbols, // 3 possible values (Hot, mild, cool)
                codebook["Humidity"].Symbols,    // 2 possible values (High, normal)
                codebook["Wind"].Symbols         // 2 possible values (Weak, strong)
            };

            int classCount = codebook["PlayTennis"].Symbols; // 2 possible values (yes, no)


            // Create a new Naive Bayes classifiers for the two classes
            NaiveBayes target = new NaiveBayes(classCount, symbolCounts);

            // Extract symbols from data and train the classifier
            DataTable symbols = codebook.Apply(data);

            int[][] inputs  = symbols.ToArray <int>("Outlook", "Temperature", "Humidity", "Wind");
            int[]   outputs = symbols.ToArray <int>("PlayTennis");

            // Compute the Naive Bayes model
            target.Estimate(inputs, outputs);


            double logLikelihood;

            double[] responses;

            // Compute the result for a sunny, cool, humid and windy day:
            int[] instance = codebook.Translate("Sunny", "Cool", "High", "Strong");

            int c = target.Compute(instance, out logLikelihood, out responses);

            string result = codebook.Translate("PlayTennis", c);

            Assert.AreEqual("No", result);
            Assert.AreEqual(0, c);
            Assert.AreEqual(0.795, responses[0], 1e-3);
            Assert.AreEqual(1, responses.Sum(), 1e-10);
            Assert.IsFalse(double.IsNaN(responses[0]));
            Assert.AreEqual(2, responses.Length);
        }
 public void Load_Model()
 {
     machine = Accord.IO.Serializer.Load <NaiveBayes <CauchyDistribution> >(Utilities.Globals.MACHINE_PATH.Insert(Utilities.Globals.MACHINE_PATH.LastIndexOf('.'), "NB"));
 }