/// <summary> /// Spatial Bivariate Moran I using /// I=Sigma_i(Sigma_j(Yi*Wij*Xj))/(S0*Sqrt(Variance(Y)*Variance(X))) /// where S0 is the sum of all the elements in W /// NOTE: X[i], Y[i] and Centroids[i] MUST all reference the same spatial area i.e. all three arrays are in step /// </summary> /// <param name="X">Data values of first table</param> /// <param name="Y">Data values of second table (must match X spatially)</param> /// <param name="Centroids">Centroid points of polygon areas to calculate distance weights (must match X and Y spatially) /// first point is ([0,0], [0,1]), second point is ([1,0],[1,1]). The reason for using the 2d double array in preference to /// an array of Point is the big increase in speed.</param> /// <returns></returns> public static double [] SpatialBivariateMoranI(double [] X, double [] Y, double [,] Centroids) { System.Diagnostics.Debug.WriteLine("SpatialBivariateMoranI start"); //Assert X.Length==Y.Length? //compute some stats on the X and Y sequences that we're going to need RunningStat rsx = new RunningStat(); foreach (double value in X) { rsx.Push(value); } RunningStat rsy = new RunningStat(); foreach (double value in Y) { rsy.Push(value); } double MeanX = rsx.Mean, SDX = rsx.StandardDeviation; double MeanY = rsy.Mean, SDY = rsy.StandardDeviation; double Sum1 = 0, Sum2 = 0; double S0 = 0; System.Diagnostics.Stopwatch timer = System.Diagnostics.Stopwatch.StartNew(); //Parallel.For(0, Y.Length, i => for (int i = 0; i < Y.Length; i++) { double CiX = Centroids[i, 0]; double CiY = Centroids[i, 1]; //Parallel.For(0, X.Length, j => for (int j = 0; j < X.Length; j++) { double dx = CiX - Centroids[j, 0]; double dy = CiY - Centroids[j, 1]; double D = Math.Sqrt(dx * dx + dy * dy); //SURELY 1/W !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! double W = 0; if (D < 1) { W = 1; //autocorrelation weight=1 } else { W = 1 / D; //otherwise the correlation weight is 1/D } Sum1 += Y[i] * W * X[j]; //version 1 Sum2 += ((Y[i] - MeanY) / SDY) * W * ((X[i] - MeanX) / SDX); //version 2 S0 += W; //sum of all weights }/*);*/ }/*);*/ double I1 = Sum1 / (S0 * Math.Sqrt(rsy.Variance * rsx.Variance)); double I2 = Sum2 / S0; System.Diagnostics.Debug.WriteLine("SpatialBivariateMoranI finished: " + timer.ElapsedMilliseconds + " ms"); return(new double [] { I1, I2 }); }
/// <summary> /// Correlate two tables using K nearest neighbours. /// NOTE: the X value is the base location, so the neighbours are looked up in Y. /// TODO: do you need to weight the neighbours differently to the central value? /// There are various ways of doing this. Here I'm using neighbours =0.5 but you could use centroid distances. /// </summary> /// <param name="areas">Area keys for the X and Y data arrays</param> /// <param name="X"></param> /// <param name="Y"></param> public double Correlate(string[] areas, double[] X, double[] Y) { //go through each value of X, lookup the K nearest neighbours in Y and correlate //Basically, this is a copy of Correlation.SpatialBivariateMoranI but with the K bit added //compute some stats on the X and Y sequences that we're going to need RunningStat rsx = new RunningStat(); foreach (double value in X) { rsx.Push(value); } RunningStat rsy = new RunningStat(); foreach (double value in Y) { rsy.Push(value); } double MeanX = rsx.Mean, SDX = rsx.StandardDeviation; double MeanY = rsy.Mean, SDY = rsy.StandardDeviation; double Sum = 0; double S0 = 0; //sum of all weights //System.Diagnostics.Stopwatch timer = System.Diagnostics.Stopwatch.StartNew(); for (int i = 0; i < X.Length; i++) { //do the central locations first double W; W = 1.0; Sum += ((Y[i] - MeanY) / SDY) * W * ((X[i] - MeanX) / SDX); S0 += W; //now the K neighbours; W = 0.5; string[] KNs = Neighbours[areas[i]]; //K neighbours around area j for (int j = 0; j < K; j++) { Sum += ((Y[i] - MeanY) / SDY) * W * ((X[i] - MeanX) / SDX); S0 += W; } } double I = Sum / S0; return(I); }
//private void ZeroDeltaWeights() //{ // for (int y = 0; y < OutputDimension; y++) // { // for (int x = 0; x < OutputDimension; x++) // { // for (int j = 0; j < InputNeurons; j++) // { // deltaW[x, y, j] = 0; // } // } // } //} /// <summary> /// Add all delta weights on to weights at the end of a training epoch /// </summary> //private double UpdateDeltaWeights() //{ // double Sum = 0; // for (int y = 0; y < OutputDimension; y++) // { // for (int x = 0; x < OutputDimension; x++) // { // for (int j = 0; j < InputNeurons; j++) // { // Sum += Math.Abs(deltaW[x, y, j]); // W[x,y,j]+=deltaW[x, y, j]; // } // } // } // return Sum; //} /// <summary> /// Go through the whole training set and sum the mean square errors for every presented pattern. /// This number is likely to be very big if there are 2558 datasets times 7201 areas = 18420158; /// As an alternative, you could look at the stats of the worst and best case. /// Divide this answer datasets*areas to get average error, which is more useful. /// POST: places errors into eAll (RMS over all datasets), eMin (RMS best dataset) and eMax (RMS worst dataset) /// </summary> /// <param name="Matrix"></param> /// <returns></returns> public double CalculateError(List <double[]> Matrix) { double e = 0; eMin = double.MaxValue; eMax = 0; for (int i = 0; i < Matrix.Count; i++) { RunningStat rsx = new RunningStat(); foreach (double value in Matrix[i]) { rsx.Push(value); } double MeanX = rsx.Mean, SDX = rsx.StandardDeviation; if (double.IsNaN(MeanX) || double.IsNaN(SDX) || (SDX == 0)) { //System.Diagnostics.Debug.WriteLine("Skipping "+VariableNamesIndex[i]); continue; } double[] X = new double[InputNeurons]; for (int j = 0; j < InputNeurons; j++) { X[j] = (Matrix[i][j] - MeanX) / SDX; } Forward(X); double Sum = 0; for (int j = 0; j < InputNeurons; j++) { Sum += (W[WinX, WinY, j] - X[j]) * (W[WinX, WinY, j] - X[j]); } Sum = Math.Sqrt(Sum); e += Sum; if (Sum < eMin) { eMin = Sum; } if (Sum > eMax) { eMax = Sum; } } eAll = e / (Matrix.Count * InputNeurons); eMin /= InputNeurons; eMax /= InputNeurons; return(e); //this is the raw error sum }
//Train on all the data and get the result out. Takes in all the data as a matrix of input values public void Process(string ImageDirectory) { //NOTE: need geographic lookup between areas and rows in Matrix is only passed to the output function after //the weights have been created - geography not needed for training List <double[]> Matrix; List <string> VariableNamesIndex; BinaryFormatter formatter = new BinaryFormatter(); //load existing matrix (for speed), copied fron Datastore.ProcessKNearestNeighbourCorrelate using (FileStream reader = new FileStream(Path.Combine(ImageDirectory, "matrix.bin"), FileMode.Open)) { Matrix = (List <double[]>)formatter.Deserialize(reader); } using (FileStream reader = new FileStream(Path.Combine(ImageDirectory, "varnamesindex.bin"), FileMode.Open)) { VariableNamesIndex = (List <string>)formatter.Deserialize(reader); } //TODO: several times through training set with modification in learning rate and neighbourhood //now do the training Epoch = 0; double e = 0; double DatasetsAreas = Matrix.Count * InputNeurons; do { double LearnRate = 0.001, Distance = 0.5; //if (Epoch < 2) { LearnRate = 0.85; Distance = 4.0; } //else if (Epoch < 4) { LearnRate = 0.5; Distance = 3.0; } //else if (Epoch < 6) { LearnRate=0.1; Distance=2.0; } //else if (Epoch < 8) { LearnRate = 0.1; Distance = 1.0; } //else { LearnRate = 10.0 / (float)Epoch; Distance = 0.5; } LearnRate = 1.0 - (((double)Epoch + 1.0) / 10000.0); if (LearnRate < 0.1) { LearnRate = 0.1; } Distance = 4.0 - (((double)Epoch + 1.0) / 1000.0); if (Distance < 0.5) { Distance = 0.5; } //ZeroDeltaWeights(); //for (int i = 0; i <Matrix.Count; i++) //{ int i = trainingRnd.Next(0, Matrix.Count); //pick a random pattern to apply //System.Diagnostics.Debug.WriteLine("Applying: " + VariableNamesIndex[i]); //Normalise input here - sd and mean, same method as correlation and KNN RunningStat rsx = new RunningStat(); foreach (double value in Matrix[i]) { rsx.Push(value); } double MeanX = rsx.Mean, SDX = rsx.StandardDeviation; if (double.IsNaN(MeanX) || double.IsNaN(SDX) || (SDX == 0)) { //System.Diagnostics.Debug.WriteLine("Skipping "+VariableNamesIndex[i]); continue; } double[] X = new double[InputNeurons]; for (int j = 0; j < InputNeurons; j++) { X[j] = (Matrix[i][j] - MeanX) / SDX; } //back propagate, sum errors across whole of training set (NOTE: not using this error value) double deltaE = Backward(LearnRate, Distance, X); //LearnRate and Distance here //System.Diagnostics.Debug.WriteLine("e=" + e + " Mean="+MeanX+" SDX="+SDX); //} //now all the patterns have been presented, add the delta weights onto the weights and calculate the change //double deltaSum = UpdateDeltaWeights(); //periodically present all the patterns and recalculate the error if (Epoch % 100 == 0) { e = CalculateError(Matrix); //e=total error over all datasets and areas } if (Epoch % 100 == 0) { System.Diagnostics.Debug.WriteLine("Epoch: " + Epoch + " LearnRate=" + LearnRate + " Dist=" + Distance + " Error: " + e + " eAll: " + eAll + " eMin: " + eMin + " eMax: " + eMax); } if (Epoch % 1000 == 0) { SaveWeights(Path.Combine(ImageDirectory, "kohonen_weights.bin")); } ++Epoch; } while (eAll > 0.001); SaveWeights(Path.Combine(ImageDirectory, "kohonen_weights.bin")); //now output the results (the weights are maps) - need area keys //currently doing this outside function due to areakey problem }