Exemplo n.º 1
0
        /// <summary>
        /// Take the matrix and variable names lookup from the image directory can apply the classification (using the currently loaded
        /// weights) to all of the training set.
        /// </summary>
        /// <param name="ImageDirectory"></param>
        public void ClassifyAll(string ImageDirectory, string OutFilename)
        {
            List <double[]> Matrix;
            List <string>   VariableNamesIndex;
            BinaryFormatter formatter = new BinaryFormatter();

            //load existing matrix (for speed), copied fron Datastore.ProcessKNearestNeighbourCorrelate
            using (FileStream reader = new FileStream(Path.Combine(ImageDirectory, "matrix.bin"), FileMode.Open))
            {
                Matrix = (List <double[]>)formatter.Deserialize(reader);
            }
            using (FileStream reader = new FileStream(Path.Combine(ImageDirectory, "varnamesindex.bin"), FileMode.Open))
            {
                VariableNamesIndex = (List <string>)formatter.Deserialize(reader);
            }

            using (TextWriter writer = File.CreateText(OutFilename))
            {
                writer.Write("j,variable,x,y,arg");
                for (int y = 0; y < OutputDimension; y++)
                {
                    for (int x = 0; x < OutputDimension; x++)
                    {
                        writer.Write("," + x + "_" + y);
                    }
                }
                writer.WriteLine();
                for (int i = 0; i < Matrix.Count; i++)
                {
                    //TODO: you could move the normalisation out into a separate function as also in the Backward procedure
                    RunningStat rsx = new RunningStat();
                    foreach (double value in Matrix[i])
                    {
                        rsx.Push(value);
                    }
                    double MeanX = rsx.Mean, SDX = rsx.StandardDeviation;
                    if (double.IsNaN(MeanX) || double.IsNaN(SDX) || (SDX == 0))
                    {
                        //System.Diagnostics.Debug.WriteLine("Skipping "+VariableNamesIndex[i]);
                        continue;
                    }
                    double[] X = new double[InputNeurons];
                    for (int j = 0; j < InputNeurons; j++)
                    {
                        X[j] = (Matrix[i][j] - MeanX) / SDX;
                    }

                    Forward(X);
                    writer.Write(i + "," + VariableNamesIndex[i] + "," + WinX + "," + WinY + "," + WinArg);
                    for (int y = 0; y < OutputDimension; y++)
                    {
                        for (int x = 0; x < OutputDimension; x++)
                        {
                            writer.Write("," + OutputNeurons[x, y]);
                        }
                    }
                    writer.WriteLine();
                }
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// Bivariate Moran I based on a pre-calculated spatial weights matrix (which is why it's not static like the other version).
        /// You don't need to pass in a centroids array as it's using this.SpatialWeights from the constructor, but you MUST ensure
        /// that X and Y are in step with the Centroids used to calcualte the spatial weights i.e. X[0], Y[0] and Centroid[0] all
        /// reference the same area. This should be obvious.
        /// </summary>
        /// <param name="X">Dataset X</param>
        /// <param name="Y">Dataset Y</param>
        /// <returns>Correlation coefficient [-1..+1]</returns>
        public double SpatialBivariateMoranIFast(double[] X, double[] Y)
        {
            System.Diagnostics.Debug.WriteLine("SpatialBivariateMoranIFast start");
            //Assert X.Length==Y.Length?
            //compute some stats on the X and Y sequences that we're going to need
            RunningStat rsx = new RunningStat();

            foreach (double value in X)
            {
                rsx.Push(value);
            }
            RunningStat rsy = new RunningStat();

            foreach (double value in Y)
            {
                rsy.Push(value);
            }
            double MeanX = rsx.Mean, SDX = rsx.StandardDeviation;
            double MeanY = rsy.Mean, SDY = rsy.StandardDeviation;

            //pre-calculate normalised X and Y to save some time on the big nested loop below
            double[] XN = new double[X.Length];
            double[] YN = new double[Y.Length];
            for (int i = 0; i < X.Length; i++)
            {
                XN[i] = (X[i] - MeanX) / SDX;
                YN[i] = (Y[i] - MeanY) / SDY;
            }

            double Sum1 = 0, Sum2 = 0;

            //double S0 = 0;
            System.Diagnostics.Stopwatch timer = System.Diagnostics.Stopwatch.StartNew();
            double[] ParallelSum = new double[Y.Length];
            Parallel.For(0, YN.Length, i =>
                         //for (int i = 0; i < YN.Length; i++)
            {
                ParallelSum[i] = 0;
                //Parallel.For(0, X.Length, j =>
                for (int j = 0; j < XN.Length; j++)
                {
                    double W = (float)this.SpatialWeights[i, j];
                    //Sum1 += Y[i] * W * X[j]; //version 1
                    //Sum2 += ((Y[i] - MeanY) / SDY) * W * ((X[i] - MeanX) / SDX); //version 2
                    //Sum2 += YN[i] * W * XN[j]; //version 2 optimised with pre-calculated normalised X and Y
                    ParallelSum[i] += YN[i] * W * XN[j]; //version 3 optimised for parallel
                    //S0 += W; //sum of all weights
                }/*);*/
            });
            for (int i = 0; i < YN.Length; i++)
            {
                Sum2 += ParallelSum[i]; //gather up all the parallel i sums into one - OK, you can use a summation kernel here...
            }
            double I = Sum2 / this.S0;  //was S0

            System.Diagnostics.Debug.WriteLine("SpatialBivariateMoranI finished: " + timer.ElapsedMilliseconds + " ms");
            return(I);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Spatial Bivariate Moran I using
        /// I=Sigma_i(Sigma_j(Yi*Wij*Xj))/(S0*Sqrt(Variance(Y)*Variance(X)))
        /// where S0 is the sum of all the elements in W
        /// NOTE: X[i], Y[i] and Centroids[i] MUST all reference the same spatial area i.e. all three arrays are in step
        /// </summary>
        /// <param name="X">Data values of first table</param>
        /// <param name="Y">Data values of second table (must match X spatially)</param>
        /// <param name="Centroids">Centroid points of polygon areas to calculate distance weights (must match X and Y spatially)
        /// first point is ([0,0], [0,1]), second point is ([1,0],[1,1]). The reason for using the 2d double array in preference to
        /// an array of Point is the big increase in speed.</param>
        /// <returns></returns>
        public static double [] SpatialBivariateMoranI(double [] X, double [] Y, double [,] Centroids)
        {
            System.Diagnostics.Debug.WriteLine("SpatialBivariateMoranI start");
            //Assert X.Length==Y.Length?
            //compute some stats on the X and Y sequences that we're going to need
            RunningStat rsx = new RunningStat();

            foreach (double value in X)
            {
                rsx.Push(value);
            }
            RunningStat rsy = new RunningStat();

            foreach (double value in Y)
            {
                rsy.Push(value);
            }
            double MeanX = rsx.Mean, SDX = rsx.StandardDeviation;
            double MeanY = rsy.Mean, SDY = rsy.StandardDeviation;

            double Sum1 = 0, Sum2 = 0;
            double S0 = 0;

            System.Diagnostics.Stopwatch timer = System.Diagnostics.Stopwatch.StartNew();
            //Parallel.For(0, Y.Length, i =>
            for (int i = 0; i < Y.Length; i++)
            {
                double CiX = Centroids[i, 0];
                double CiY = Centroids[i, 1];
                //Parallel.For(0, X.Length, j =>
                for (int j = 0; j < X.Length; j++)
                {
                    double dx = CiX - Centroids[j, 0];
                    double dy = CiY - Centroids[j, 1];
                    double D  = Math.Sqrt(dx * dx + dy * dy); //SURELY 1/W !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                    double W  = 0;
                    if (D < 1)
                    {
                        W = 1;        //autocorrelation weight=1
                    }
                    else
                    {
                        W = 1 / D;                                               //otherwise the correlation weight is 1/D
                    }
                    Sum1 += Y[i] * W * X[j];                                     //version 1
                    Sum2 += ((Y[i] - MeanY) / SDY) * W * ((X[i] - MeanX) / SDX); //version 2
                    S0   += W;                                                   //sum of all weights
                }/*);*/
            }/*);*/
            double I1 = Sum1 / (S0 * Math.Sqrt(rsy.Variance * rsx.Variance));
            double I2 = Sum2 / S0;

            System.Diagnostics.Debug.WriteLine("SpatialBivariateMoranI finished: " + timer.ElapsedMilliseconds + " ms");
            return(new double [] { I1, I2 });
        }
Exemplo n.º 4
0
        /// <summary>
        /// Correlate two tables using K nearest neighbours.
        /// NOTE: the X value is the base location, so the neighbours are looked up in Y.
        /// TODO: do you need to weight the neighbours differently to the central value?
        /// There are various ways of doing this. Here I'm using neighbours =0.5 but you could use centroid distances.
        /// </summary>
        /// <param name="areas">Area keys for the X and Y data arrays</param>
        /// <param name="X"></param>
        /// <param name="Y"></param>
        public double Correlate(string[] areas, double[] X, double[] Y)
        {
            //go through each value of X, lookup the K nearest neighbours in Y and correlate
            //Basically, this is a copy of Correlation.SpatialBivariateMoranI but with the K bit added

            //compute some stats on the X and Y sequences that we're going to need
            RunningStat rsx = new RunningStat();

            foreach (double value in X)
            {
                rsx.Push(value);
            }
            RunningStat rsy = new RunningStat();

            foreach (double value in Y)
            {
                rsy.Push(value);
            }
            double MeanX = rsx.Mean, SDX = rsx.StandardDeviation;
            double MeanY = rsy.Mean, SDY = rsy.StandardDeviation;

            double Sum = 0;
            double S0  = 0; //sum of all weights

            //System.Diagnostics.Stopwatch timer = System.Diagnostics.Stopwatch.StartNew();
            for (int i = 0; i < X.Length; i++)
            {
                //do the central locations first
                double W;
                W    = 1.0;
                Sum += ((Y[i] - MeanY) / SDY) * W * ((X[i] - MeanX) / SDX);
                S0  += W;

                //now the K neighbours;
                W = 0.5;
                string[] KNs = Neighbours[areas[i]]; //K neighbours around area j
                for (int j = 0; j < K; j++)
                {
                    Sum +=
                        ((Y[i] - MeanY) / SDY) * W * ((X[i] - MeanX) / SDX);
                    S0 += W;
                }
            }
            double I = Sum / S0;

            return(I);
        }
Exemplo n.º 5
0
        //private void ZeroDeltaWeights()
        //{
        //    for (int y = 0; y < OutputDimension; y++)
        //    {
        //        for (int x = 0; x < OutputDimension; x++)
        //        {
        //            for (int j = 0; j < InputNeurons; j++)
        //            {
        //                deltaW[x, y, j] = 0;
        //            }
        //        }
        //    }
        //}

        /// <summary>
        /// Add all delta weights on to weights at the end of a training epoch
        /// </summary>
        //private double UpdateDeltaWeights()
        //{
        //    double Sum = 0;
        //    for (int y = 0; y < OutputDimension; y++)
        //    {
        //        for (int x = 0; x < OutputDimension; x++)
        //        {
        //            for (int j = 0; j < InputNeurons; j++)
        //            {
        //                Sum += Math.Abs(deltaW[x, y, j]);
        //                W[x,y,j]+=deltaW[x, y, j];
        //            }
        //        }
        //    }
        //    return Sum;
        //}

        /// <summary>
        /// Go through the whole training set and sum the mean square errors for every presented pattern.
        /// This number is likely to be very big if there are 2558 datasets times 7201 areas = 18420158;
        /// As an alternative, you could look at the stats of the worst and best case.
        /// Divide this answer datasets*areas to get average error, which is more useful.
        /// POST: places errors into eAll (RMS over all datasets), eMin (RMS best dataset) and eMax (RMS worst dataset)
        /// </summary>
        /// <param name="Matrix"></param>
        /// <returns></returns>
        public double CalculateError(List <double[]> Matrix)
        {
            double e = 0;

            eMin = double.MaxValue; eMax = 0;
            for (int i = 0; i < Matrix.Count; i++)
            {
                RunningStat rsx = new RunningStat();
                foreach (double value in Matrix[i])
                {
                    rsx.Push(value);
                }
                double MeanX = rsx.Mean, SDX = rsx.StandardDeviation;
                if (double.IsNaN(MeanX) || double.IsNaN(SDX) || (SDX == 0))
                {
                    //System.Diagnostics.Debug.WriteLine("Skipping "+VariableNamesIndex[i]);
                    continue;
                }
                double[] X = new double[InputNeurons];
                for (int j = 0; j < InputNeurons; j++)
                {
                    X[j] = (Matrix[i][j] - MeanX) / SDX;
                }

                Forward(X);
                double Sum = 0;
                for (int j = 0; j < InputNeurons; j++)
                {
                    Sum += (W[WinX, WinY, j] - X[j]) * (W[WinX, WinY, j] - X[j]);
                }
                Sum = Math.Sqrt(Sum);
                e  += Sum;
                if (Sum < eMin)
                {
                    eMin = Sum;
                }
                if (Sum > eMax)
                {
                    eMax = Sum;
                }
            }
            eAll  = e / (Matrix.Count * InputNeurons);
            eMin /= InputNeurons;
            eMax /= InputNeurons;
            return(e); //this is the raw error sum
        }
Exemplo n.º 6
0
        //Train on all the data and get the result out. Takes in all the data as a matrix of input values
        public void Process(string ImageDirectory)
        {
            //NOTE: need geographic lookup between areas and rows in Matrix is only passed to the output function after
            //the weights have been created - geography not needed for training

            List <double[]> Matrix;
            List <string>   VariableNamesIndex;
            BinaryFormatter formatter = new BinaryFormatter();

            //load existing matrix (for speed), copied fron Datastore.ProcessKNearestNeighbourCorrelate
            using (FileStream reader = new FileStream(Path.Combine(ImageDirectory, "matrix.bin"), FileMode.Open))
            {
                Matrix = (List <double[]>)formatter.Deserialize(reader);
            }
            using (FileStream reader = new FileStream(Path.Combine(ImageDirectory, "varnamesindex.bin"), FileMode.Open))
            {
                VariableNamesIndex = (List <string>)formatter.Deserialize(reader);
            }


            //TODO: several times through training set with modification in learning rate and neighbourhood
            //now do the training
            Epoch = 0;
            double e             = 0;
            double DatasetsAreas = Matrix.Count * InputNeurons;

            do
            {
                double LearnRate = 0.001, Distance = 0.5;
                //if (Epoch < 2) { LearnRate = 0.85; Distance = 4.0; }
                //else if (Epoch < 4) { LearnRate = 0.5; Distance = 3.0; }
                //else if (Epoch < 6) { LearnRate=0.1; Distance=2.0; }
                //else if (Epoch < 8) { LearnRate = 0.1; Distance = 1.0; }
                //else { LearnRate = 10.0 / (float)Epoch; Distance = 0.5; }
                LearnRate = 1.0 - (((double)Epoch + 1.0) / 10000.0);
                if (LearnRate < 0.1)
                {
                    LearnRate = 0.1;
                }
                Distance = 4.0 - (((double)Epoch + 1.0) / 1000.0);
                if (Distance < 0.5)
                {
                    Distance = 0.5;
                }

                //ZeroDeltaWeights();

                //for (int i = 0; i <Matrix.Count; i++)
                //{
                int i = trainingRnd.Next(0, Matrix.Count); //pick a random pattern to apply
                //System.Diagnostics.Debug.WriteLine("Applying: " + VariableNamesIndex[i]);
                //Normalise input here - sd and mean, same method as correlation and KNN
                RunningStat rsx = new RunningStat();
                foreach (double value in Matrix[i])
                {
                    rsx.Push(value);
                }
                double MeanX = rsx.Mean, SDX = rsx.StandardDeviation;
                if (double.IsNaN(MeanX) || double.IsNaN(SDX) || (SDX == 0))
                {
                    //System.Diagnostics.Debug.WriteLine("Skipping "+VariableNamesIndex[i]);
                    continue;
                }
                double[] X = new double[InputNeurons];
                for (int j = 0; j < InputNeurons; j++)
                {
                    X[j] = (Matrix[i][j] - MeanX) / SDX;
                }

                //back propagate, sum errors across whole of training set (NOTE: not using this error value)
                double deltaE = Backward(LearnRate, Distance, X);     //LearnRate and Distance here
                //System.Diagnostics.Debug.WriteLine("e=" + e + " Mean="+MeanX+" SDX="+SDX);
                //}
                //now all the patterns have been presented, add the delta weights onto the weights and calculate the change
                //double deltaSum = UpdateDeltaWeights();

                //periodically present all the patterns and recalculate the error
                if (Epoch % 100 == 0)
                {
                    e = CalculateError(Matrix);                   //e=total error over all datasets and areas
                }
                if (Epoch % 100 == 0)
                {
                    System.Diagnostics.Debug.WriteLine("Epoch: " + Epoch + " LearnRate=" + LearnRate + " Dist=" + Distance + " Error: " + e
                                                       + " eAll: " + eAll + " eMin: " + eMin + " eMax: " + eMax);
                }
                if (Epoch % 1000 == 0)
                {
                    SaveWeights(Path.Combine(ImageDirectory, "kohonen_weights.bin"));
                }

                ++Epoch;
            } while (eAll > 0.001);

            SaveWeights(Path.Combine(ImageDirectory, "kohonen_weights.bin"));

            //now output the results (the weights are maps) - need area keys
            //currently doing this outside function due to areakey problem
        }