예제 #1
0
        private Perceptron buildPerceptronForAllData(IEnumerable <TupleStruct <double[], int> > trainingData, HashSet <int> positiveClasses, int dimension)
        {
            Perceptron perceptron = new Perceptron(dimension, normalizePerceptrons);

            perceptron.Train(trainingData.Select(item => new TupleStruct <double[], int>(item.Item1, positiveClasses.Contains(item.Item2) ? 1 : -1)).ToArray());

            return(perceptron);
        }
예제 #2
0
        //Here we take in an IGrouping, since one probably already exists.  It could easily just be a list.
        private Perceptron buildPerceptronEvenWeights(IEnumerable <IGrouping <int, LabeledInstance> > groupsByLabel, HashSet <int> positiveClasses, int[] groupSizes, int dimension)
        {
            TupleStruct <double[], int, double>[] perceptronTrainingData = groupsByLabel.SelectMany(
                grp => grp.Select(instance => new TupleStruct <double[], int, double>(instance.values, (positiveClasses.Contains(grp.Key) ? 1 : -1), 1.0 / groupSizes[grp.Key]))
                ).ToArray();

            Perceptron p = new Perceptron(dimension, normalizePerceptrons);

            p.Train(perceptronTrainingData);
            return(p);
        }
예제 #3
0
        private Perceptron buildPerceptronEvenClassSizes(IEnumerable <IGrouping <int, LabeledInstance> > groupsByLabel, HashSet <int> positiveClasses, int minClassSize, int dimension)
        {
            //TODO: Solve the numbers dilema.

            //Approach 1, take a constant number of every class.  If less exist in a negative, they are not taken.  This may cause inflation of the positive class.
            TupleStruct <double[], int>[] perceptronTrainingData = groupsByLabel.SelectMany(
                grp => grp.Take(minClassSize).Select(instance => new TupleStruct <double[], int>(instance.values, positiveClasses.Contains(grp.Key) ? 1 : -1))
                ).ToArray();

            /*
             * //Approach 2, take a constant number of negative examples.  Favors big negative classes.
             * TupleStruct<double[], int>[] positiveData = groupsByLabel.Where (grp => positiveClasses.Contains(grp.Key)).SelectMany(
             *      grp => grp.Take(minClassSize).Select (instance => new TupleStruct<double[], int>(instance.values, 1))
             * ).ToArray();
             *
             * //TODO: This approach has its own problems: the negative data is biased
             * TupleStruct<double[], int>[] negativeData = groupsByLabel.Where (grp => !positiveClasses.Contains(grp.Key)).Flatten1().Shuffle ().Take (minClassSize * positiveClasses.Count)
             */

            Perceptron p = new Perceptron(dimension, normalizePerceptrons);

            p.Train(perceptronTrainingData);
            return(p);
        }
예제 #4
0
        public void Train(IEnumerable <LabeledInstance> trainingData)
        {
            trainingData = trainingData.ToArray();             //TODO performance.
            classes      = trainingData.Select(item => item.label).Distinct().Order().ToArray();

            Dictionary <string, int> classLookup = classes.IndexLookupDictionary();

            int dimension = trainingData.First().values.Length;                                           //TODO 0 case.

            int perceptronCount = (int)Math.Ceiling(Math.Log(classes.Length, 2) * perceptronCountFactor); //Need at least log_2 perceptrons to be able to represent any item with a TRUE combination.  Take twice as many to improve predictive power.

            Random rand;

            if (cloudSizeStDev == 0)
            {
                rand = null;
            }
            else
            {
                rand = new Random();
            }
            switch (trainingMode)
            {
            case PerceptronTrainingMode.TRAIN_ALL_DATA:
            {
                //TODO Don't shuffle every time.  Highly inefficient.
                perceptrons = Enumerable.Range(0, perceptronCount).AsParallel().Select(val =>
                    {
                        int[] classIndices    = Enumerable.Range(0, classes.Length).Shuffle().Take(classesToTake(rand)).ToArray();
                        Perceptron perceptron = buildPerceptronForAllData(trainingData.Shuffle().Select(instance =>
                                                                                                        new TupleStruct <double[], int>(instance.values, classLookup[instance.label])),
                                                                          new HashSet <int>(classIndices),
                                                                          dimension);
                        return(new TupleStruct <Perceptron, int[]>(perceptron, classIndices));
                    }
                                                                                       ).ToArray();
                break;
            }

            case PerceptronTrainingMode.TRAIN_EVEN_SIZE:
            {
                IEnumerable <IGrouping <int, LabeledInstance> > byLabel = trainingData.GroupBy(item => classLookup[item.label]); //TODO: What type of enumerable does this return?  Can it be enumerated multiple times efficiently?
                int[] classCounts = new int[classes.Length];
                foreach (IGrouping <int, LabeledInstance> grp in byLabel)                                                        //TODO: Might be nice to have a higher order for this, like ToDictionary but with first type an int.
                {
                    classCounts[grp.Key] = grp.Count();
                }
                perceptrons = Enumerable.Range(0, perceptronCount).AsParallel().Select(index =>
                    {
                        int[] classIndices    = Enumerable.Range(0, classes.Length).Shuffle().Take(classesToTake(rand)).ToArray();                                 //TODO: Don't shufle every time.
                        Perceptron perceptron = buildPerceptronEvenClassSizes(byLabel, new HashSet <int>(classIndices), classCounts.Min(), dimension);
                        return(new TupleStruct <Perceptron, int[]>(perceptron, classIndices));
                    }
                                                                                       ).ToArray();
                break;
            }

            case PerceptronTrainingMode.TRAIN_EVEN_WEIGHTS:
            {
                IEnumerable <IGrouping <int, LabeledInstance> > byLabel = trainingData.GroupBy(item => classLookup[item.label]); //TODO: What type of enumerable does this return?  Can it be enumerated multiple times efficiently?
                int[] classCounts = new int[classes.Length];
                foreach (IGrouping <int, LabeledInstance> grp in byLabel)                                                        //TODO: Might be nice to have a higher order for this, like ToDictionary but with first type an int.
                {
                    classCounts[grp.Key] = grp.Count();
                }
                perceptrons = Enumerable.Range(0, perceptronCount).AsParallel().Select(index =>
                    {
                        int[] classIndices    = Enumerable.Range(0, classes.Length).Shuffle().Take(classesToTake(rand)).ToArray();
                        Perceptron perceptron = buildPerceptronEvenWeights(byLabel, new HashSet <int>(classIndices), classCounts, dimension);
                        return(new TupleStruct <Perceptron, int[]>(perceptron, classIndices));
                    }
                                                                                       ).ToArray();
                break;
            }
            }
        }