public void GetTiles2(int [] tiles, int nt, collision_table ct, float f1, float f2, int h1) { f_tmp_arr[0] = f1; f_tmp_arr[1] = f2; i_tmp_arr[0] = h1; GetTiles(tiles, nt, ct, f_tmp_arr, 2, i_tmp_arr, 1); }
public void GetTiles(int [] tiles, int nt, collision_table ct, float [] floats, int nf, int h1, int h2, int h3) { i_tmp_arr[0] = h1; i_tmp_arr[1] = h2; i_tmp_arr[2] = h3; GetTiles(tiles, nt, ct, floats, nf, i_tmp_arr, 3); }
public void GetTiles1(int [] tiles, int nt, collision_table ct, float f1, int h1, int h2) { f_tmp_arr[0] = f1; i_tmp_arr[0] = h1; i_tmp_arr[1] = h2; GetTiles(tiles, nt, ct, f_tmp_arr, 1, i_tmp_arr, 2); }
public static void SaveFile(collision_table ct, string filename) { IFormatter bf = new BinaryFormatter(); Stream stream = new FileStream(filename, FileMode.Create, FileAccess.Write, FileShare.None); bf.Serialize(stream, ct); stream.Close(); Console.WriteLine("Collision data saves"); }
public void GetTilesWrap( int [] tiles, // provided array contains returned tiles (tile indices) int num_tilings, // number of tile indices to be returned in tiles collision_table ctable, // total number of possible tiles float [] floats, // array of floating point variables int num_floats, // number of floating point variables int [] wrap_widths, // array of widths (length and units as in floats) int [] ints, // array of integer variables int num_ints) // number of integer variables { int i, j; int [] qstate = new int[max_num_vars]; int [] baseDim = new int[max_num_vars]; int [] wrap_widths_times_num_tilings = new int[max_num_vars]; int [] coordinates = new int[max_num_vars * 2 + 1]; /* one interval number per relevant dimension */ int num_coordinates = num_floats + num_ints + 1; for (i = 0; i < num_ints; i++) { coordinates[num_floats + 1 + i] = ints[i]; } /* quantize state to integers (henceforth, tile widths == num_tilings) */ for (i = 0; i < num_floats; i++) { qstate[i] = (int)Math.Floor(floats[i] * num_tilings); baseDim[i] = 0; wrap_widths_times_num_tilings[i] = wrap_widths[i] * num_tilings; } /*compute the tile numbers */ for (j = 0; j < num_tilings; j++) { /* loop over each relevant dimension */ for (i = 0; i < num_floats; i++) { /* find coordinates of activated tile in tiling space */ coordinates[i] = qstate[i] - mod(qstate[i] - baseDim[i], num_tilings); if (wrap_widths[i] != 0) { coordinates[i] = mod(coordinates[i], wrap_widths_times_num_tilings[i]); } /* compute displacement of next tiling in quantized space */ baseDim[i] += 1 + (2 * i); } /* add additional indices for tiling and hashing_set so they hash differently */ coordinates[i] = j; tiles[j] = hash(coordinates, num_coordinates, ctable); } return; }
/* hash * Takes an array of integers and returns the corresponding tile after hashing */ int hash(int [] ints, int num_ints, collision_table ct) { int j; int ccheck; ct.calls++; j = hash_UNH(ints, num_ints, ct.m, 449); ccheck = hash_UNH(ints, num_ints, max_longInt, 457); if (ccheck == (int)ct.data[j]) { ct.clearhits++; } else if (ct.data[j] == -1) { ct.clearhits++; ct.data[j] = ccheck; } else if (ct.safe == 0) { ct.collisions++; } else { int h2 = 1 + 2 * hash_UNH(ints, num_ints, (max_longInt) / 4, 449); int i = 0; while (++i > 0) { ct.collisions++; j = (j + h2) % (ct.m); //printf("(%d)",j); if (i > ct.m) { Console.WriteLine("\nOut of Memory"); Environment.Exit(0); } if (ccheck == ct.data[j]) { break; } if (ct.data[j] == -1) { ct.data[j] = ccheck; break; } } } return(j); }
public static collision_table LoadFile(string fileToLoad) { if (File.Exists(fileToLoad)) { IFormatter bf = new BinaryFormatter(); Stream stream = new FileStream(fileToLoad, FileMode.Open, FileAccess.Read, FileShare.Read); collision_table ct = (collision_table)bf.Deserialize(stream); stream.Close(); Console.WriteLine("Collision data saves"); Console.WriteLine("example data retrieved is {0}", ct.m); return(ct); } else { return(null); } }
public void GetTiles1(int [] tiles, int nt, collision_table ct, float f1) { f_tmp_arr[0] = f1; GetTiles(tiles, nt, ct, f_tmp_arr, 1, i_tmp_arr, 0); }
public void GetTiles(int [] tiles, int nt, collision_table ct, float [] floats, int nf, int h1) { i_tmp_arr[0] = h1; GetTiles(tiles, nt, ct, floats, nf, i_tmp_arr, 1); }
public void GetTiles(int [] tiles, int nt, collision_table ct, float [] floats, int nf) { GetTiles(tiles, nt, ct, floats, nf, i_tmp_arr, 0); }
public Q_LearningAgent(int numFeatures, int numActions, bool bLearn, double rewards, double [] widths, string loadWeightsFile, string saveWeightsFile) : base(numFeatures, numActions) { this.numActions = numActions; this.numFeatures = numFeatures; numTiles = numFeatures * numDimTiles; tileWidths = new double[numFeatures]; Q = new double[numActions]; //weights = new double[numActions,numTiles]; //weightsRaw=new double[numActions,numTiles]; //weights = weightsRaw; traces = new double[numActions, numTiles]; traceAll = new double[numTiles]; // number of tiles in each tiling tile = new int[numFeatures, numDimTiles]; actionTiles = new int[numActions, rl_memory_Size]; tiless = new Tile[numFeatures, numDimTiles]; nonzeroTraces = new int[rl_max_nonzero_Traces]; nonzeroTracesInverse = new int[rl_memory_Size]; currentState = new int[numActions, numDimTiles]; previousState = new int[numActions, numDimTiles]; weightsFile = saveWeightsFile; bLearning = bLearn; lastReward = rewards; for (int i = 0; i < getNumFeatures(); i++) { tileWidths[i] = widths[i]; } config = RLConfig.Load("RLConfig.xml"); alpha = config.alpha; gamma = config.gamma; lambda = config.lambda; epsilon = config.epsilon; minimumTrace = config.traceability; //for (int i=0; i < rl_memory_Size;i++) for (int j = 0; j < numActions; j++) //yet to do ... each Q(s,a) should represent { Q[j] = 0; } epochNum = 0; lastAction = -1; numNonzeroTraces = 0; //Sabre changed this to initial weights for all tiles per action //need to check if this yields to weights being always zero when ever you call a SarsaAgent object //if it is remove this initialisation and put it in the start-episode method if (config.transfer == 1) { weights = loadWeights(loadWeightsFile); //load traces too //at the moment initial the traces but need to explore saving them too for (int j = 0; j < numActions; j++) { for (int i = 0; i < numTiles; i++) { traces[j, i] = 0; } } } else { for (int j = 0; j < numActions; j++) { for (int i = 0; i < numTiles; i++) { weights[j, i] = 0; traces[j, i] = 0; } } } int [] tmp = new int[2]; float [] tmpf = new float[2]; colTab = new collision_table(rl_memory_Size, 1); tiles obj_tiles = new tiles(); }