RunningStat, DatastoreMiner C# (CSharp) Code Examples

Example #1

0

Show file

        /// <summary>
        /// Spatial Bivariate Moran I using
        /// I=Sigma_i(Sigma_j(Yi*Wij*Xj))/(S0*Sqrt(Variance(Y)*Variance(X)))
        /// where S0 is the sum of all the elements in W
        /// NOTE: X[i], Y[i] and Centroids[i] MUST all reference the same spatial area i.e. all three arrays are in step
        /// </summary>
        /// <param name="X">Data values of first table</param>
        /// <param name="Y">Data values of second table (must match X spatially)</param>
        /// <param name="Centroids">Centroid points of polygon areas to calculate distance weights (must match X and Y spatially)
        /// first point is ([0,0], [0,1]), second point is ([1,0],[1,1]). The reason for using the 2d double array in preference to
        /// an array of Point is the big increase in speed.</param>
        /// <returns></returns>
        public static double [] SpatialBivariateMoranI(double [] X, double [] Y, double [,] Centroids)
        {
            System.Diagnostics.Debug.WriteLine("SpatialBivariateMoranI start");
            //Assert X.Length==Y.Length?
            //compute some stats on the X and Y sequences that we're going to need
            RunningStat rsx = new RunningStat();

            foreach (double value in X)
            {
                rsx.Push(value);
            }
            RunningStat rsy = new RunningStat();

            foreach (double value in Y)
            {
                rsy.Push(value);
            }
            double MeanX = rsx.Mean, SDX = rsx.StandardDeviation;
            double MeanY = rsy.Mean, SDY = rsy.StandardDeviation;

            double Sum1 = 0, Sum2 = 0;
            double S0 = 0;

            System.Diagnostics.Stopwatch timer = System.Diagnostics.Stopwatch.StartNew();
            //Parallel.For(0, Y.Length, i =>
            for (int i = 0; i < Y.Length; i++)
            {
                double CiX = Centroids[i, 0];
                double CiY = Centroids[i, 1];
                //Parallel.For(0, X.Length, j =>
                for (int j = 0; j < X.Length; j++)
                {
                    double dx = CiX - Centroids[j, 0];
                    double dy = CiY - Centroids[j, 1];
                    double D  = Math.Sqrt(dx * dx + dy * dy); //SURELY 1/W !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                    double W  = 0;
                    if (D < 1)
                    {
                        W = 1;        //autocorrelation weight=1
                    }
                    else
                    {
                        W = 1 / D;                                               //otherwise the correlation weight is 1/D
                    }
                    Sum1 += Y[i] * W * X[j];                                     //version 1
                    Sum2 += ((Y[i] - MeanY) / SDY) * W * ((X[i] - MeanX) / SDX); //version 2
                    S0   += W;                                                   //sum of all weights
                }/*);*/
            }/*);*/
            double I1 = Sum1 / (S0 * Math.Sqrt(rsy.Variance * rsx.Variance));
            double I2 = Sum2 / S0;

            System.Diagnostics.Debug.WriteLine("SpatialBivariateMoranI finished: " + timer.ElapsedMilliseconds + " ms");
            return(new double [] { I1, I2 });
        }

Example #2

0

Show file

File: KNearestNeighbour.cs Project: maptube/DataStoreMiner

        /// <summary>
        /// Correlate two tables using K nearest neighbours.
        /// NOTE: the X value is the base location, so the neighbours are looked up in Y.
        /// TODO: do you need to weight the neighbours differently to the central value?
        /// There are various ways of doing this. Here I'm using neighbours =0.5 but you could use centroid distances.
        /// </summary>
        /// <param name="areas">Area keys for the X and Y data arrays</param>
        /// <param name="X"></param>
        /// <param name="Y"></param>
        public double Correlate(string[] areas, double[] X, double[] Y)
        {
            //go through each value of X, lookup the K nearest neighbours in Y and correlate
            //Basically, this is a copy of Correlation.SpatialBivariateMoranI but with the K bit added

            //compute some stats on the X and Y sequences that we're going to need
            RunningStat rsx = new RunningStat();

            foreach (double value in X)
            {
                rsx.Push(value);
            }
            RunningStat rsy = new RunningStat();

            foreach (double value in Y)
            {
                rsy.Push(value);
            }
            double MeanX = rsx.Mean, SDX = rsx.StandardDeviation;
            double MeanY = rsy.Mean, SDY = rsy.StandardDeviation;

            double Sum = 0;
            double S0  = 0; //sum of all weights

            //System.Diagnostics.Stopwatch timer = System.Diagnostics.Stopwatch.StartNew();
            for (int i = 0; i < X.Length; i++)
            {
                //do the central locations first
                double W;
                W    = 1.0;
                Sum += ((Y[i] - MeanY) / SDY) * W * ((X[i] - MeanX) / SDX);
                S0  += W;

                //now the K neighbours;
                W = 0.5;
                string[] KNs = Neighbours[areas[i]]; //K neighbours around area j
                for (int j = 0; j < K; j++)
                {
                    Sum +=
                        ((Y[i] - MeanY) / SDY) * W * ((X[i] - MeanX) / SDX);
                    S0 += W;
                }
            }
            double I = Sum / S0;

            return(I);
        }

Example #3

0

Show file

File: Kohonen.cs Project: maptube/DataStoreMiner

        //private void ZeroDeltaWeights()
        //{
        //    for (int y = 0; y < OutputDimension; y++)
        //    {
        //        for (int x = 0; x < OutputDimension; x++)
        //        {
        //            for (int j = 0; j < InputNeurons; j++)
        //            {
        //                deltaW[x, y, j] = 0;
        //            }
        //        }
        //    }
        //}

        /// <summary>
        /// Add all delta weights on to weights at the end of a training epoch
        /// </summary>
        //private double UpdateDeltaWeights()
        //{
        //    double Sum = 0;
        //    for (int y = 0; y < OutputDimension; y++)
        //    {
        //        for (int x = 0; x < OutputDimension; x++)
        //        {
        //            for (int j = 0; j < InputNeurons; j++)
        //            {
        //                Sum += Math.Abs(deltaW[x, y, j]);
        //                W[x,y,j]+=deltaW[x, y, j];
        //            }
        //        }
        //    }
        //    return Sum;
        //}

        /// <summary>
        /// Go through the whole training set and sum the mean square errors for every presented pattern.
        /// This number is likely to be very big if there are 2558 datasets times 7201 areas = 18420158;
        /// As an alternative, you could look at the stats of the worst and best case.
        /// Divide this answer datasets*areas to get average error, which is more useful.
        /// POST: places errors into eAll (RMS over all datasets), eMin (RMS best dataset) and eMax (RMS worst dataset)
        /// </summary>
        /// <param name="Matrix"></param>
        /// <returns></returns>
        public double CalculateError(List <double[]> Matrix)
        {
            double e = 0;

            eMin = double.MaxValue; eMax = 0;
            for (int i = 0; i < Matrix.Count; i++)
            {
                RunningStat rsx = new RunningStat();
                foreach (double value in Matrix[i])
                {
                    rsx.Push(value);
                }
                double MeanX = rsx.Mean, SDX = rsx.StandardDeviation;
                if (double.IsNaN(MeanX) || double.IsNaN(SDX) || (SDX == 0))
                {
                    //System.Diagnostics.Debug.WriteLine("Skipping "+VariableNamesIndex[i]);
                    continue;
                }
                double[] X = new double[InputNeurons];
                for (int j = 0; j < InputNeurons; j++)
                {
                    X[j] = (Matrix[i][j] - MeanX) / SDX;
                }

                Forward(X);
                double Sum = 0;
                for (int j = 0; j < InputNeurons; j++)
                {
                    Sum += (W[WinX, WinY, j] - X[j]) * (W[WinX, WinY, j] - X[j]);
                }
                Sum = Math.Sqrt(Sum);
                e  += Sum;
                if (Sum < eMin)
                {
                    eMin = Sum;
                }
                if (Sum > eMax)
                {
                    eMax = Sum;
                }
            }
            eAll  = e / (Matrix.Count * InputNeurons);
            eMin /= InputNeurons;
            eMax /= InputNeurons;
            return(e); //this is the raw error sum
        }

Example #4

0

Show file

File: Kohonen.cs Project: maptube/DataStoreMiner

        //Train on all the data and get the result out. Takes in all the data as a matrix of input values
        public void Process(string ImageDirectory)
        {
            //NOTE: need geographic lookup between areas and rows in Matrix is only passed to the output function after
            //the weights have been created - geography not needed for training

            List <double[]> Matrix;
            List <string>   VariableNamesIndex;
            BinaryFormatter formatter = new BinaryFormatter();

            //load existing matrix (for speed), copied fron Datastore.ProcessKNearestNeighbourCorrelate
            using (FileStream reader = new FileStream(Path.Combine(ImageDirectory, "matrix.bin"), FileMode.Open))
            {
                Matrix = (List <double[]>)formatter.Deserialize(reader);
            }
            using (FileStream reader = new FileStream(Path.Combine(ImageDirectory, "varnamesindex.bin"), FileMode.Open))
            {
                VariableNamesIndex = (List <string>)formatter.Deserialize(reader);
            }


            //TODO: several times through training set with modification in learning rate and neighbourhood
            //now do the training
            Epoch = 0;
            double e             = 0;
            double DatasetsAreas = Matrix.Count * InputNeurons;

            do
            {
                double LearnRate = 0.001, Distance = 0.5;
                //if (Epoch < 2) { LearnRate = 0.85; Distance = 4.0; }
                //else if (Epoch < 4) { LearnRate = 0.5; Distance = 3.0; }
                //else if (Epoch < 6) { LearnRate=0.1; Distance=2.0; }
                //else if (Epoch < 8) { LearnRate = 0.1; Distance = 1.0; }
                //else { LearnRate = 10.0 / (float)Epoch; Distance = 0.5; }
                LearnRate = 1.0 - (((double)Epoch + 1.0) / 10000.0);
                if (LearnRate < 0.1)
                {
                    LearnRate = 0.1;
                }
                Distance = 4.0 - (((double)Epoch + 1.0) / 1000.0);
                if (Distance < 0.5)
                {
                    Distance = 0.5;
                }

                //ZeroDeltaWeights();

                //for (int i = 0; i <Matrix.Count; i++)
                //{
                int i = trainingRnd.Next(0, Matrix.Count); //pick a random pattern to apply
                //System.Diagnostics.Debug.WriteLine("Applying: " + VariableNamesIndex[i]);
                //Normalise input here - sd and mean, same method as correlation and KNN
                RunningStat rsx = new RunningStat();
                foreach (double value in Matrix[i])
                {
                    rsx.Push(value);
                }
                double MeanX = rsx.Mean, SDX = rsx.StandardDeviation;
                if (double.IsNaN(MeanX) || double.IsNaN(SDX) || (SDX == 0))
                {
                    //System.Diagnostics.Debug.WriteLine("Skipping "+VariableNamesIndex[i]);
                    continue;
                }
                double[] X = new double[InputNeurons];
                for (int j = 0; j < InputNeurons; j++)
                {
                    X[j] = (Matrix[i][j] - MeanX) / SDX;
                }

                //back propagate, sum errors across whole of training set (NOTE: not using this error value)
                double deltaE = Backward(LearnRate, Distance, X);     //LearnRate and Distance here
                //System.Diagnostics.Debug.WriteLine("e=" + e + " Mean="+MeanX+" SDX="+SDX);
                //}
                //now all the patterns have been presented, add the delta weights onto the weights and calculate the change
                //double deltaSum = UpdateDeltaWeights();

                //periodically present all the patterns and recalculate the error
                if (Epoch % 100 == 0)
                {
                    e = CalculateError(Matrix);                   //e=total error over all datasets and areas
                }
                if (Epoch % 100 == 0)
                {
                    System.Diagnostics.Debug.WriteLine("Epoch: " + Epoch + " LearnRate=" + LearnRate + " Dist=" + Distance + " Error: " + e
                                                       + " eAll: " + eAll + " eMin: " + eMin + " eMax: " + eMax);
                }
                if (Epoch % 1000 == 0)
                {
                    SaveWeights(Path.Combine(ImageDirectory, "kohonen_weights.bin"));
                }

                ++Epoch;
            } while (eAll > 0.001);

            SaveWeights(Path.Combine(ImageDirectory, "kohonen_weights.bin"));

            //now output the results (the weights are maps) - need area keys
            //currently doing this outside function due to areakey problem
        }

C# (CSharp) DatastoreMiner RunningStat Examples