Esempio n. 1
0
 public LinearLeraningMachine(ClassificationProblem problem)
 {
     this.m_problem = problem;
     this.m_t_set   = problem.TrainingSet;
     //this.m_problem.RetrieveVocabulary(out this.m_voc);
     this.m_l = m_t_set.Examples.Count;
     //this.m_weight = new SparseVector(m_voc.Count);
 }
 public NearestNeighbourClassification()
     : base()
 {
     Parameters.Add(new FixedValueParameter <BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
     Parameters.Add(new FixedValueParameter <IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
     Parameters.Add(new OptionalValueParameter <DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     Problem = new ClassificationProblem();
 }
    public RandomForestClassification()
      : base() {
      Parameters.Add(new FixedValueParameter<IntValue>(NumberOfTreesParameterName, "The number of trees in the forest. Should be between 50 and 100", new IntValue(50)));
      Parameters.Add(new FixedValueParameter<DoubleValue>(RParameterName, "The ratio of the training set that will be used in the construction of individual trees (0<r<=1). Should be adjusted depending on the noise level in the dataset in the range from 0.66 (low noise) to 0.05 (high noise). This parameter should be adjusted to achieve good generalization error.", new DoubleValue(0.3)));
      Parameters.Add(new FixedValueParameter<DoubleValue>(MParameterName, "The ratio of features that will be used in the construction of individual trees (0<m<=1)", new DoubleValue(0.5)));
      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0)));
      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true)));
      Parameters.Add(new FixedValueParameter<BoolValue>(CreateSolutionParameterName, "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true)));
      Parameters[CreateSolutionParameterName].Hidden = true;

      Problem = new ClassificationProblem();
    }
        /// <summary>
        /// foamliu, 2009/12/21, please make sure you've uncompressed "2_newsgroups.7z" in the "data" folder.
        /// </summary>
        /// <returns></returns>
        private static ClassificationProblem CreateText()
        {
            const string DataFolder = @"..\data\2_newsgroups";

            ClassificationProblem problem = new ClassificationProblem();

            ExampleSet t_set = new ExampleSet();
            ExampleSet v_set = new ExampleSet();

            CategoryCollection collect = new CategoryCollection();

            collect.Add(new Category(+1, "+1"));
            collect.Add(new Category(-1, "-1"));

            problem.Dimension          = 2;
            problem.CategoryCollection = collect;

            DirectoryInfo dataFolder = new DirectoryInfo(DataFolder);

            DirectoryInfo[] subfolders = dataFolder.GetDirectories();
            int             count      = 0;

            for (int i = 0; i < subfolders.Count(); i++)
            {
                DirectoryInfo categoryFolder = subfolders[i];
                int           cat            = i * 2 - 1;
                // for all the text files in each category
                FileInfo[] files = categoryFolder.GetFiles();

                count = 0;
                int trainSetCount = Convert.ToInt32(Constants.TrainingSetRatio * files.Count());
                for (int j = 0; j < files.Count(); j++)
                {
                    FileInfo textFile = files[j];
                    Example  e        = new Example();

                    if (++count < trainSetCount)
                    {
                        t_set.AddExample(e);
                    }
                    else
                    {
                        v_set.AddExample(e);
                    }
                }
            }

            problem.TrainingSet   = t_set;
            problem.ValidationSet = v_set;


            return(problem);
        }
        private static ClassificationProblem CreateChessBoard()
        {
            ClassificationProblem problem = new ClassificationProblem();

            CategoryCollection collect = new CategoryCollection();

            collect.Add(new Category(+1, "+1"));
            collect.Add(new Category(-1, "-1"));

            problem.Dimension          = 2;
            problem.CategoryCollection = collect;
            problem.TrainingSet        = GetExamples(collect);
            problem.ValidationSet      = GetExamples(collect);


            return(problem);
        }
Esempio n. 6
0
        //private bool m_isleaf;

        public Node(ClassificationProblem problem, int first, int second)
        {
            this.m_problem = problem;
            this.m_first   = first;
            this.m_second  = second;
            this.m_llm     = new LinearLeraningMachine(problem);
            this.m_llm.Train();

            if (second > first + 1)
            {
                this.m_leftChild  = new Node(problem, first + 1, second);
                this.m_rightChild = new Node(problem, first, second - 1);
                //this.m_isleaf = false;
            }
            else
            {
                //this.m_isleaf = true;
            }
        }
Esempio n. 7
0
        public Binary_SVM_SMO(ClassificationProblem problem)
        {
            this.m_problem = problem;
            this.m_t_set   = this.m_problem.TrainingSet;
            // this.m_problem.RetrieveVocabulary(out this.m_voc);
            this.m_l     = m_t_set.Examples.Count;
            this.m_alpha = new double[m_l];
            this.m_error = new double[m_l];

            this.m_kernel   = new LinearKernel();
            this.m_NonBound = new List <int>();
            this.m_rand     = new Random();
            this.m_weight   = new SparseVector(problem.Dimension);

            // foamliu, 2009/01/12, default values
            this.m_c         = Constants.SVM_C;
            this.m_eta       = Constants.SVM_Eta;
            this.m_tolerance = Constants.SVM_Tolerance;
            this.m_epsilon   = Constants.SVM_Epsilon;
        }
 public NearestNeighbourClassification()
     : base()
 {
     Parameters.Add(new FixedValueParameter <IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
     Problem = new ClassificationProblem();
 }
Esempio n. 9
0
 public LinearDiscriminantAnalysis()
     : base()
 {
     Problem = new ClassificationProblem();
 }
Esempio n. 10
0
        public NcaAlgorithm()
            : base()
        {
            Parameters.Add(new ValueParameter <IntValue>(SeedParameterName, "The seed of the random number generator.", new IntValue(0)));
            Parameters.Add(new ValueParameter <BoolValue>(SetSeedRandomlyParameterName, "A boolean flag that indicates whether the seed should be randomly reset each time the algorithm is run.", new BoolValue(true)));
            Parameters.Add(new FixedValueParameter <IntValue>(KParameterName, "The K for the nearest neighbor.", new IntValue(3)));
            Parameters.Add(new FixedValueParameter <IntValue>(DimensionsParameterName, "The number of dimensions that NCA should reduce the data to.", new IntValue(2)));
            Parameters.Add(new ConstrainedValueParameter <INcaInitializer>(InitializationParameterName, "Which method should be used to initialize the matrix. Typically LDA (linear discriminant analysis) should provide a good estimate."));
            Parameters.Add(new FixedValueParameter <IntValue>(NeighborSamplesParameterName, "How many of the neighbors should be sampled in order to speed up the calculation. This should be at least the value of k and at most the number of training instances minus one will be used.", new IntValue(60)));
            Parameters.Add(new FixedValueParameter <IntValue>(IterationsParameterName, "How many iterations the conjugate gradient (CG) method should be allowed to perform. The method might still terminate earlier if a local optima has already been reached.", new IntValue(50)));
            Parameters.Add(new FixedValueParameter <DoubleValue>(RegularizationParameterName, "A non-negative paramter which can be set to increase generalization and avoid overfitting. If set to 0 the algorithm is similar to NCA as proposed by Goldberger et al.", new DoubleValue(0)));
            Parameters.Add(new ValueParameter <INcaModelCreator>(NcaModelCreatorParameterName, "Creates an NCA model out of the matrix.", new NcaModelCreator()));
            Parameters.Add(new ValueParameter <INcaSolutionCreator>(NcaSolutionCreatorParameterName, "Creates an NCA solution given a model and some data.", new NcaSolutionCreator()));
            Parameters.Add(new ValueParameter <BoolValue>(ApproximateGradientsParameterName, "True if the gradient should be approximated otherwise they are computed exactly.", new BoolValue()));

            NcaSolutionCreatorParameter.Hidden   = true;
            ApproximateGradientsParameter.Hidden = true;

            INcaInitializer defaultInitializer = null;

            foreach (var initializer in ApplicationManager.Manager.GetInstances <INcaInitializer>().OrderBy(x => x.ItemName))
            {
                if (initializer is LdaInitializer)
                {
                    defaultInitializer = initializer;
                }
                InitializationParameter.ValidValues.Add(initializer);
            }
            if (defaultInitializer != null)
            {
                InitializationParameter.Value = defaultInitializer;
            }

            var randomCreator      = new RandomCreator();
            var ncaInitializer     = new Placeholder();
            var bfgsInitializer    = new LbfgsInitializer();
            var makeStep           = new LbfgsMakeStep();
            var branch             = new ConditionalBranch();
            var gradientCalculator = new NcaGradientCalculator();
            var modelCreator       = new Placeholder();
            var updateResults      = new LbfgsUpdateResults();
            var analyzer           = new LbfgsAnalyzer();
            var finalModelCreator  = new Placeholder();
            var finalAnalyzer      = new LbfgsAnalyzer();
            var solutionCreator    = new Placeholder();

            OperatorGraph.InitialOperator                     = randomCreator;
            randomCreator.SeedParameter.ActualName            = SeedParameterName;
            randomCreator.SeedParameter.Value                 = null;
            randomCreator.SetSeedRandomlyParameter.ActualName = SetSeedRandomlyParameterName;
            randomCreator.SetSeedRandomlyParameter.Value      = null;
            randomCreator.Successor = ncaInitializer;

            ncaInitializer.Name = "(NcaInitializer)";
            ncaInitializer.OperatorParameter.ActualName = InitializationParameterName;
            ncaInitializer.Successor = bfgsInitializer;

            bfgsInitializer.IterationsParameter.ActualName           = IterationsParameterName;
            bfgsInitializer.PointParameter.ActualName                = NcaMatrixParameterName;
            bfgsInitializer.ApproximateGradientsParameter.ActualName = ApproximateGradientsParameterName;
            bfgsInitializer.Successor = makeStep;

            makeStep.StateParameter.ActualName = bfgsInitializer.StateParameter.Name;
            makeStep.PointParameter.ActualName = NcaMatrixParameterName;
            makeStep.Successor = branch;

            branch.ConditionParameter.ActualName = makeStep.TerminationCriterionParameter.Name;
            branch.FalseBranch = gradientCalculator;
            branch.TrueBranch  = finalModelCreator;

            gradientCalculator.Successor = modelCreator;

            modelCreator.OperatorParameter.ActualName = NcaModelCreatorParameterName;
            modelCreator.Successor = updateResults;

            updateResults.StateParameter.ActualName                = bfgsInitializer.StateParameter.Name;
            updateResults.QualityParameter.ActualName              = QualityParameterName;
            updateResults.QualityGradientsParameter.ActualName     = NcaMatrixGradientsParameterName;
            updateResults.ApproximateGradientsParameter.ActualName = ApproximateGradientsParameterName;
            updateResults.Successor = analyzer;

            analyzer.QualityParameter.ActualName               = QualityParameterName;
            analyzer.PointParameter.ActualName                 = NcaMatrixParameterName;
            analyzer.QualityGradientsParameter.ActualName      = NcaMatrixGradientsParameterName;
            analyzer.StateParameter.ActualName                 = bfgsInitializer.StateParameter.Name;
            analyzer.PointsTableParameter.ActualName           = "Matrix table";
            analyzer.QualityGradientsTableParameter.ActualName = "Gradients table";
            analyzer.QualitiesTableParameter.ActualName        = "Qualities";
            analyzer.Successor = makeStep;

            finalModelCreator.OperatorParameter.ActualName = NcaModelCreatorParameterName;
            finalModelCreator.Successor = finalAnalyzer;

            finalAnalyzer.QualityParameter.ActualName               = QualityParameterName;
            finalAnalyzer.PointParameter.ActualName                 = NcaMatrixParameterName;
            finalAnalyzer.QualityGradientsParameter.ActualName      = NcaMatrixGradientsParameterName;
            finalAnalyzer.PointsTableParameter.ActualName           = analyzer.PointsTableParameter.ActualName;
            finalAnalyzer.QualityGradientsTableParameter.ActualName = analyzer.QualityGradientsTableParameter.ActualName;
            finalAnalyzer.QualitiesTableParameter.ActualName        = analyzer.QualitiesTableParameter.ActualName;
            finalAnalyzer.Successor = solutionCreator;

            solutionCreator.OperatorParameter.ActualName = NcaSolutionCreatorParameterName;

            Problem = new ClassificationProblem();
        }
Esempio n. 11
0
    public NcaAlgorithm()
      : base() {
      Parameters.Add(new ValueParameter<IntValue>(SeedParameterName, "The seed of the random number generator.", new IntValue(0)));
      Parameters.Add(new ValueParameter<BoolValue>(SetSeedRandomlyParameterName, "A boolean flag that indicates whether the seed should be randomly reset each time the algorithm is run.", new BoolValue(true)));
      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The K for the nearest neighbor.", new IntValue(3)));
      Parameters.Add(new FixedValueParameter<IntValue>(DimensionsParameterName, "The number of dimensions that NCA should reduce the data to.", new IntValue(2)));
      Parameters.Add(new ConstrainedValueParameter<INcaInitializer>(InitializationParameterName, "Which method should be used to initialize the matrix. Typically LDA (linear discriminant analysis) should provide a good estimate."));
      Parameters.Add(new FixedValueParameter<IntValue>(NeighborSamplesParameterName, "How many of the neighbors should be sampled in order to speed up the calculation. This should be at least the value of k and at most the number of training instances minus one will be used.", new IntValue(60)));
      Parameters.Add(new FixedValueParameter<IntValue>(IterationsParameterName, "How many iterations the conjugate gradient (CG) method should be allowed to perform. The method might still terminate earlier if a local optima has already been reached.", new IntValue(50)));
      Parameters.Add(new FixedValueParameter<DoubleValue>(RegularizationParameterName, "A non-negative paramter which can be set to increase generalization and avoid overfitting. If set to 0 the algorithm is similar to NCA as proposed by Goldberger et al.", new DoubleValue(0)));
      Parameters.Add(new ValueParameter<INcaModelCreator>(NcaModelCreatorParameterName, "Creates an NCA model out of the matrix.", new NcaModelCreator()));
      Parameters.Add(new ValueParameter<INcaSolutionCreator>(NcaSolutionCreatorParameterName, "Creates an NCA solution given a model and some data.", new NcaSolutionCreator()));
      Parameters.Add(new ValueParameter<BoolValue>(ApproximateGradientsParameterName, "True if the gradient should be approximated otherwise they are computed exactly.", new BoolValue()));

      NcaSolutionCreatorParameter.Hidden = true;
      ApproximateGradientsParameter.Hidden = true;

      INcaInitializer defaultInitializer = null;
      foreach (var initializer in ApplicationManager.Manager.GetInstances<INcaInitializer>().OrderBy(x => x.ItemName)) {
        if (initializer is LdaInitializer) defaultInitializer = initializer;
        InitializationParameter.ValidValues.Add(initializer);
      }
      if (defaultInitializer != null) InitializationParameter.Value = defaultInitializer;

      var randomCreator = new RandomCreator();
      var ncaInitializer = new Placeholder();
      var bfgsInitializer = new LbfgsInitializer();
      var makeStep = new LbfgsMakeStep();
      var branch = new ConditionalBranch();
      var gradientCalculator = new NcaGradientCalculator();
      var modelCreator = new Placeholder();
      var updateResults = new LbfgsUpdateResults();
      var analyzer = new LbfgsAnalyzer();
      var finalModelCreator = new Placeholder();
      var finalAnalyzer = new LbfgsAnalyzer();
      var solutionCreator = new Placeholder();

      OperatorGraph.InitialOperator = randomCreator;
      randomCreator.SeedParameter.ActualName = SeedParameterName;
      randomCreator.SeedParameter.Value = null;
      randomCreator.SetSeedRandomlyParameter.ActualName = SetSeedRandomlyParameterName;
      randomCreator.SetSeedRandomlyParameter.Value = null;
      randomCreator.Successor = ncaInitializer;

      ncaInitializer.Name = "(NcaInitializer)";
      ncaInitializer.OperatorParameter.ActualName = InitializationParameterName;
      ncaInitializer.Successor = bfgsInitializer;

      bfgsInitializer.IterationsParameter.ActualName = IterationsParameterName;
      bfgsInitializer.PointParameter.ActualName = NcaMatrixParameterName;
      bfgsInitializer.ApproximateGradientsParameter.ActualName = ApproximateGradientsParameterName;
      bfgsInitializer.Successor = makeStep;

      makeStep.StateParameter.ActualName = bfgsInitializer.StateParameter.Name;
      makeStep.PointParameter.ActualName = NcaMatrixParameterName;
      makeStep.Successor = branch;

      branch.ConditionParameter.ActualName = makeStep.TerminationCriterionParameter.Name;
      branch.FalseBranch = gradientCalculator;
      branch.TrueBranch = finalModelCreator;

      gradientCalculator.Successor = modelCreator;

      modelCreator.OperatorParameter.ActualName = NcaModelCreatorParameterName;
      modelCreator.Successor = updateResults;

      updateResults.StateParameter.ActualName = bfgsInitializer.StateParameter.Name;
      updateResults.QualityParameter.ActualName = QualityParameterName;
      updateResults.QualityGradientsParameter.ActualName = NcaMatrixGradientsParameterName;
      updateResults.ApproximateGradientsParameter.ActualName = ApproximateGradientsParameterName;
      updateResults.Successor = analyzer;

      analyzer.QualityParameter.ActualName = QualityParameterName;
      analyzer.PointParameter.ActualName = NcaMatrixParameterName;
      analyzer.QualityGradientsParameter.ActualName = NcaMatrixGradientsParameterName;
      analyzer.StateParameter.ActualName = bfgsInitializer.StateParameter.Name;
      analyzer.PointsTableParameter.ActualName = "Matrix table";
      analyzer.QualityGradientsTableParameter.ActualName = "Gradients table";
      analyzer.QualitiesTableParameter.ActualName = "Qualities";
      analyzer.Successor = makeStep;

      finalModelCreator.OperatorParameter.ActualName = NcaModelCreatorParameterName;
      finalModelCreator.Successor = finalAnalyzer;

      finalAnalyzer.QualityParameter.ActualName = QualityParameterName;
      finalAnalyzer.PointParameter.ActualName = NcaMatrixParameterName;
      finalAnalyzer.QualityGradientsParameter.ActualName = NcaMatrixGradientsParameterName;
      finalAnalyzer.PointsTableParameter.ActualName = analyzer.PointsTableParameter.ActualName;
      finalAnalyzer.QualityGradientsTableParameter.ActualName = analyzer.QualityGradientsTableParameter.ActualName;
      finalAnalyzer.QualitiesTableParameter.ActualName = analyzer.QualitiesTableParameter.ActualName;
      finalAnalyzer.Successor = solutionCreator;

      solutionCreator.OperatorParameter.ActualName = NcaSolutionCreatorParameterName;

      Problem = new ClassificationProblem();
    }
Esempio n. 12
0
 public OneR()
     : base()
 {
     Parameters.Add(new ValueParameter <IntValue>("MinBucketSize", "Minimum size of a bucket for numerical values. (Except for the rightmost bucket)", new IntValue(6)));
     Problem = new ClassificationProblem();
 }
Esempio n. 13
0
 public ZeroR()
     : base()
 {
     Problem = new ClassificationProblem();
 }
Esempio n. 14
0
 public MultiNomialLogitClassification()
     : base()
 {
     Problem = new ClassificationProblem();
 }
Esempio n. 15
0
        //private void BuildExample(TextExample example, Vocabulary voc, int exampleCount)
        //{
        //    int dimension = voc.Count;
        //    SparseVector vector = new SparseVector(dimension);

        //    foreach (string word in example.Tokens.Keys)
        //    {
        //        int pos = voc.GetWordPosition(word);
        //        if (pos == Constants.KEY_NOT_FOUND)
        //            continue;

        //        // phi i(x) = tfi log(idfi) /k
        //        // tfi:     number of occurences of the term i in the document x
        //        // idfi:    the ratio between the total number of documents and the
        //        //              number of documents containing the term
        //        // k:       normalisation constant ensuring that ||phi|| = 1
        //        double phi = example.Tokens[word] * Math.Log(exampleCount / voc.WordExampleOccurMap[word]);
        //        vector.Components.Add(pos, phi);

        //    }
        //    vector.Normalize();
        //    example.X = vector;
        //}

        //private void Preprocess(ClassificationProblem problem)
        //{
        //    Vocabulary voc;

        //    problem.RetrieveVocabulary(out voc);
        //    foreach (Category c in problem.CategoryCollection.Collection)
        //    {
        //        foreach (TextExample e in c.Examples)
        //        {
        //            BuildExample(e, voc, problem.ExampleCount);
        //        }
        //    }

        //    m_weight = new SparseVector(voc.Count);
        //}

        /// <summary>
        /// simple on-line algorithm for the 1-norm soft margin:
        /// training SVMs in the non-bias case.
        /// </summary>
        /// <param name="problem"></param>
        public void Train(ClassificationProblem problem)
        {
            ExampleSet t_Set;   // training set

            //Logging.Info("Retrieving training set");
            t_Set = problem.TrainingSet;
            l     = t_Set.Examples.Count;

            //Logging.Info("Preprocessing all the examples");
            //this.Preprocess(problem);

            m_Alpha    = new double[l];
            m_newalpha = new double[l];

            for (int i = 0; i < m_Alpha.Length; i++)
            {
                m_Alpha[i] = 0.0;
            }

            //Logging.Info("Gradient descent");

            while (true)
            {
                for (int i = 0; i < l; i++)
                {
                    double temp = 0.0;

                    for (int j = 0; j < l; j++)
                    {
                        temp += m_Alpha[j] * t_Set.Examples[j].Label.Id * m_kernel.Compute(t_Set.Examples[i].X, t_Set.Examples[j].X);
                    }
                    m_newalpha[i] = m_Alpha[i] + Constants.SVM_Eta * (1.0 - t_Set.Examples[i].Label.Id * temp);

                    if (m_newalpha[i] < 0.0)
                    {
                        m_newalpha[i] = 0.0;
                    }
                    else if (m_newalpha[i] > Constants.SVM_C)
                    {
                        m_newalpha[i] = Constants.SVM_C;
                    }
                }

                this.CopyAlphas();

                W = this.CalculateSVM_W(t_Set);

                if (Math.Abs((W - old_W) / W) < Constants.SVM_Tolerance)
                {
                    break;
                }


                Logger.Info(string.Format("SVM W = {0}", W));


                old_W = W;
            }

            this.CalculateWeight(t_Set);
            //this.CalculateB(t_Set);
        }