/************************************************************************* Serializer: allocation -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void dfalloc(alglib.serializer s, decisionforest forest) { s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); apserv.allocrealarray(s, forest.trees, forest.bufsize); }
/************************************************************************* Allocation of serializer: real matrix *************************************************************************/ public static void allocrealmatrix(alglib.serializer s, double[,] v, int n0, int n1) { int i = 0; int j = 0; if (n0 < 0) { n0 = alglib.ap.rows(v); } if (n1 < 0) { n1 = alglib.ap.cols(v); } s.alloc_entry(); s.alloc_entry(); for (i = 0; i <= n0 - 1; i++) { for (j = 0; j <= n1 - 1; j++) { s.alloc_entry(); } } }
/************************************************************************* Unserialization: complex value *************************************************************************/ public static void unserializerealmatrix(alglib.serializer s, ref double[,] v) { int i = 0; int j = 0; int n0 = 0; int n1 = 0; double t = 0; v = new double[0, 0]; n0 = s.unserialize_int(); n1 = s.unserialize_int(); if (n0 == 0 || n1 == 0) { return; } v = new double[n0, n1]; for (i = 0; i <= n0 - 1; i++) { for (j = 0; j <= n1 - 1; j++) { t = s.unserialize_double(); v[i, j] = t; } } }
/************************************************************************* Unserialization: complex value *************************************************************************/ public static void unserializerealarray(alglib.serializer s, ref double[] v) { int n = 0; int i = 0; double t = 0; v = new double[0]; n = s.unserialize_int(); if (n == 0) { return; } v = new double[n]; for (i = 0; i <= n - 1; i++) { t = s.unserialize_double(); v[i] = t; } }
/************************************************************************* Serialization: Integer array *************************************************************************/ public static void serializeintegerarray(alglib.serializer s, int[] v, int n) { int i = 0; if (n < 0) { n = alglib.ap.len(v); } s.serialize_int(n); for (i = 0; i <= n - 1; i++) { s.serialize_int(v[i]); } }
/************************************************************************* Allocation of serializer: complex value *************************************************************************/ public static void alloccomplex(alglib.serializer s, complex v) { s.alloc_entry(); s.alloc_entry(); }
/************************************************************************* Unserialization: complex value *************************************************************************/ public static complex unserializecomplex(alglib.serializer s) { complex result = 0; result.x = s.unserialize_double(); result.y = s.unserialize_double(); return result; }
/************************************************************************* Serializer: unserialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpunserialize(alglib.serializer s, multilayerperceptron network) { int i0 = 0; int i1 = 0; int i = 0; int j = 0; int k = 0; int fkind = 0; double threshold = 0; double v0 = 0; double v1 = 0; int nin = 0; int nout = 0; bool issoftmax = new bool(); int[] layersizes = new int[0]; // // check correctness of header // i0 = s.unserialize_int(); alglib.ap.assert(i0 == scodes.getmlpserializationcode(), "MLPUnserialize: stream header corrupted"); i1 = s.unserialize_int(); alglib.ap.assert(i1 == mlpfirstversion, "MLPUnserialize: stream header corrupted"); // // Create network // issoftmax = s.unserialize_bool(); apserv.unserializeintegerarray(s, ref layersizes); alglib.ap.assert((alglib.ap.len(layersizes) == 2 || alglib.ap.len(layersizes) == 3) || alglib.ap.len(layersizes) == 4, "MLPUnserialize: too many hidden layers!"); nin = layersizes[0]; nout = layersizes[alglib.ap.len(layersizes) - 1]; if (alglib.ap.len(layersizes) == 2) { if (issoftmax) { mlpcreatec0(layersizes[0], layersizes[1], network); } else { mlpcreate0(layersizes[0], layersizes[1], network); } } if (alglib.ap.len(layersizes) == 3) { if (issoftmax) { mlpcreatec1(layersizes[0], layersizes[1], layersizes[2], network); } else { mlpcreate1(layersizes[0], layersizes[1], layersizes[2], network); } } if (alglib.ap.len(layersizes) == 4) { if (issoftmax) { mlpcreatec2(layersizes[0], layersizes[1], layersizes[2], layersizes[3], network); } else { mlpcreate2(layersizes[0], layersizes[1], layersizes[2], layersizes[3], network); } } // // Load neurons and weights // for (i = 1; i <= alglib.ap.len(layersizes) - 1; i++) { for (j = 0; j <= layersizes[i] - 1; j++) { fkind = s.unserialize_int(); threshold = s.unserialize_double(); mlpsetneuroninfo(network, i, j, fkind, threshold); for (k = 0; k <= layersizes[i - 1] - 1; k++) { v0 = s.unserialize_double(); mlpsetweight(network, i - 1, k, i, j, v0); } } } // // Load standartizator // for (j = 0; j <= nin - 1; j++) { v0 = s.unserialize_double(); v1 = s.unserialize_double(); mlpsetinputscaling(network, j, v0, v1); } for (j = 0; j <= nout - 1; j++) { v0 = s.unserialize_double(); v1 = s.unserialize_double(); mlpsetoutputscaling(network, j, v0, v1); } }
/************************************************************************* Calculation of all types of errors at once for a subset or full dataset, which can be represented in different formats. THIS INTERNAL FUNCTION IS NOT INTENDED TO BE USED BY ALGLIB USERS! -- ALGLIB -- Copyright 26.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpallerrorsx(multilayerperceptron network, double[,] densexy, sparse.sparsematrix sparsexy, int datasetsize, int datasettype, int[] idx, int subset0, int subset1, int subsettype, alglib.smp.shared_pool buf, modelerrors rep) { int nin = 0; int nout = 0; int wcount = 0; int rowsize = 0; bool iscls = new bool(); int srcidx = 0; int cstart = 0; int csize = 0; int j = 0; hpccores.mlpbuffers pbuf = null; int len0 = 0; int len1 = 0; modelerrors rep0 = new modelerrors(); modelerrors rep1 = new modelerrors(); int i_ = 0; int i1_ = 0; alglib.ap.assert(datasetsize >= 0, "MLPAllErrorsX: SetSize<0"); alglib.ap.assert(datasettype == 0 || datasettype == 1, "MLPAllErrorsX: DatasetType is incorrect"); alglib.ap.assert(subsettype == 0 || subsettype == 1, "MLPAllErrorsX: SubsetType is incorrect"); // // Determine network properties // mlpproperties(network, ref nin, ref nout, ref wcount); iscls = mlpissoftmax(network); // // Split problem. // // Splitting problem allows us to reduce effect of single-precision // arithmetics (SSE-optimized version of MLPChunkedProcess uses single // precision internally, but converts them to double precision after // results are exported from HPC buffer to network). Small batches are // calculated in single precision, results are aggregated in double // precision, and it allows us to avoid accumulation of errors when // we process very large batches (tens of thousands of items). // // NOTE: it is important to use real arithmetics for ProblemCost // because ProblemCost may be larger than MAXINT. // if (subset1 - subset0 >= 2 * microbatchsize && (double)(apserv.inttoreal(subset1 - subset0) * apserv.inttoreal(wcount)) > (double)(gradbasecasecost)) { apserv.splitlength(subset1 - subset0, microbatchsize, ref len0, ref len1); mlpallerrorsx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0, subset0 + len0, subsettype, buf, rep0); mlpallerrorsx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0 + len0, subset1, subsettype, buf, rep1); rep.relclserror = (len0 * rep0.relclserror + len1 * rep1.relclserror) / (len0 + len1); rep.avgce = (len0 * rep0.avgce + len1 * rep1.avgce) / (len0 + len1); rep.rmserror = Math.Sqrt((len0 * math.sqr(rep0.rmserror) + len1 * math.sqr(rep1.rmserror)) / (len0 + len1)); rep.avgerror = (len0 * rep0.avgerror + len1 * rep1.avgerror) / (len0 + len1); rep.avgrelerror = (len0 * rep0.avgrelerror + len1 * rep1.avgrelerror) / (len0 + len1); return; } // // Retrieve and prepare // alglib.smp.ae_shared_pool_retrieve(buf, ref pbuf); if (iscls) { rowsize = nin + 1; bdss.dserrallocate(nout, ref pbuf.tmp0); } else { rowsize = nin + nout; bdss.dserrallocate(-nout, ref pbuf.tmp0); } // // Processing // hpccores.hpcpreparechunkedgradient(network.weights, wcount, mlpntotal(network), nin, nout, pbuf); cstart = subset0; while (cstart < subset1) { // // Determine size of current chunk and copy it to PBuf.XY // csize = Math.Min(subset1, cstart + pbuf.chunksize) - cstart; for (j = 0; j <= csize - 1; j++) { srcidx = -1; if (subsettype == 0) { srcidx = cstart + j; } if (subsettype == 1) { srcidx = idx[cstart + j]; } alglib.ap.assert(srcidx >= 0, "MLPAllErrorsX: internal error"); if (datasettype == 0) { for (i_ = 0; i_ <= rowsize - 1; i_++) { pbuf.xy[j, i_] = densexy[srcidx, i_]; } } if (datasettype == 1) { sparse.sparsegetrow(sparsexy, srcidx, ref pbuf.xyrow); for (i_ = 0; i_ <= rowsize - 1; i_++) { pbuf.xy[j, i_] = pbuf.xyrow[i_]; } } } // // Unpack XY and process (temporary code, to be replaced by chunked processing) // for (j = 0; j <= csize - 1; j++) { for (i_ = 0; i_ <= rowsize - 1; i_++) { pbuf.xy2[j, i_] = pbuf.xy[j, i_]; } } mlpchunkedprocess(network, pbuf.xy2, 0, csize, pbuf.batch4buf, pbuf.hpcbuf); for (j = 0; j <= csize - 1; j++) { for (i_ = 0; i_ <= nin - 1; i_++) { pbuf.x[i_] = pbuf.xy2[j, i_]; } i1_ = (nin) - (0); for (i_ = 0; i_ <= nout - 1; i_++) { pbuf.y[i_] = pbuf.xy2[j, i_ + i1_]; } if (iscls) { pbuf.desiredy[0] = pbuf.xy[j, nin]; } else { i1_ = (nin) - (0); for (i_ = 0; i_ <= nout - 1; i_++) { pbuf.desiredy[i_] = pbuf.xy[j, i_ + i1_]; } } bdss.dserraccumulate(ref pbuf.tmp0, pbuf.y, pbuf.desiredy); } // // Process chunk and advance line pointer // cstart = cstart + pbuf.chunksize; } bdss.dserrfinish(ref pbuf.tmp0); rep.relclserror = pbuf.tmp0[0]; rep.avgce = pbuf.tmp0[1] / Math.Log(2); rep.rmserror = pbuf.tmp0[2]; rep.avgerror = pbuf.tmp0[3]; rep.avgrelerror = pbuf.tmp0[4]; // // Recycle // alglib.smp.ae_shared_pool_recycle(buf, ref pbuf); }
/************************************************************************* Serializer: allocation -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpalloc(alglib.serializer s, multilayerperceptron network) { int i = 0; int j = 0; int k = 0; int fkind = 0; double threshold = 0; double v0 = 0; double v1 = 0; int nin = 0; int nout = 0; nin = network.hllayersizes[0]; nout = network.hllayersizes[alglib.ap.len(network.hllayersizes) - 1]; s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); apserv.allocintegerarray(s, network.hllayersizes, -1); for (i = 1; i <= alglib.ap.len(network.hllayersizes) - 1; i++) { for (j = 0; j <= network.hllayersizes[i] - 1; j++) { mlpgetneuroninfo(network, i, j, ref fkind, ref threshold); s.alloc_entry(); s.alloc_entry(); for (k = 0; k <= network.hllayersizes[i - 1] - 1; k++) { s.alloc_entry(); } } } for (j = 0; j <= nin - 1; j++) { mlpgetinputscaling(network, j, ref v0, ref v1); s.alloc_entry(); s.alloc_entry(); } for (j = 0; j <= nout - 1; j++) { mlpgetoutputscaling(network, j, ref v0, ref v1); s.alloc_entry(); s.alloc_entry(); } }
/************************************************************************* Serializer: serialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpserialize(alglib.serializer s, multilayerperceptron network) { int i = 0; int j = 0; int k = 0; int fkind = 0; double threshold = 0; double v0 = 0; double v1 = 0; int nin = 0; int nout = 0; nin = network.hllayersizes[0]; nout = network.hllayersizes[alglib.ap.len(network.hllayersizes) - 1]; s.serialize_int(scodes.getmlpserializationcode()); s.serialize_int(mlpfirstversion); s.serialize_bool(mlpissoftmax(network)); apserv.serializeintegerarray(s, network.hllayersizes, -1); for (i = 1; i <= alglib.ap.len(network.hllayersizes) - 1; i++) { for (j = 0; j <= network.hllayersizes[i] - 1; j++) { mlpgetneuroninfo(network, i, j, ref fkind, ref threshold); s.serialize_int(fkind); s.serialize_double(threshold); for (k = 0; k <= network.hllayersizes[i - 1] - 1; k++) { s.serialize_double(mlpgetweight(network, i - 1, k, i, j)); } } } for (j = 0; j <= nin - 1; j++) { mlpgetinputscaling(network, j, ref v0, ref v1); s.serialize_double(v0); s.serialize_double(v1); } for (j = 0; j <= nout - 1; j++) { mlpgetoutputscaling(network, j, ref v0, ref v1); s.serialize_double(v0); s.serialize_double(v1); } }
/************************************************************************* Internal function which actually calculates batch gradient for a subset or full dataset, which can be represented in different formats. THIS FUNCTION IS NOT INTENDED TO BE USED BY ALGLIB USERS! -- ALGLIB -- Copyright 26.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpgradbatchx(multilayerperceptron network, double[,] densexy, sparse.sparsematrix sparsexy, int datasetsize, int datasettype, int[] idx, int subset0, int subset1, int subsettype, alglib.smp.shared_pool buf, alglib.smp.shared_pool gradbuf) { int nin = 0; int nout = 0; int wcount = 0; int rowsize = 0; int srcidx = 0; int cstart = 0; int csize = 0; int j = 0; double problemcost = 0; hpccores.mlpbuffers buf2 = null; int len0 = 0; int len1 = 0; hpccores.mlpbuffers pbuf = null; smlpgrad sgrad = null; int i_ = 0; alglib.ap.assert(datasetsize >= 0, "MLPGradBatchX: SetSize<0"); alglib.ap.assert(datasettype == 0 || datasettype == 1, "MLPGradBatchX: DatasetType is incorrect"); alglib.ap.assert(subsettype == 0 || subsettype == 1, "MLPGradBatchX: SubsetType is incorrect"); // // Determine network and dataset properties // mlpproperties(network, ref nin, ref nout, ref wcount); if (mlpissoftmax(network)) { rowsize = nin + 1; } else { rowsize = nin + nout; } // // Split problem. // // Splitting problem allows us to reduce effect of single-precision // arithmetics (SSE-optimized version of MLPChunkedGradient uses single // precision internally, but converts them to double precision after // results are exported from HPC buffer to network). Small batches are // calculated in single precision, results are aggregated in double // precision, and it allows us to avoid accumulation of errors when // we process very large batches (tens of thousands of items). // // NOTE: it is important to use real arithmetics for ProblemCost // because ProblemCost may be larger than MAXINT. // problemcost = subset1 - subset0; problemcost = problemcost * wcount; if (subset1 - subset0 >= 2 * microbatchsize && (double)(problemcost) > (double)(gradbasecasecost)) { apserv.splitlength(subset1 - subset0, microbatchsize, ref len0, ref len1); mlpgradbatchx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0, subset0 + len0, subsettype, buf, gradbuf); mlpgradbatchx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0 + len0, subset1, subsettype, buf, gradbuf); return; } // // Chunked processing // alglib.smp.ae_shared_pool_retrieve(gradbuf, ref sgrad); alglib.smp.ae_shared_pool_retrieve(buf, ref pbuf); hpccores.hpcpreparechunkedgradient(network.weights, wcount, mlpntotal(network), nin, nout, pbuf); cstart = subset0; while (cstart < subset1) { // // Determine size of current chunk and copy it to PBuf.XY // csize = Math.Min(subset1, cstart + pbuf.chunksize) - cstart; for (j = 0; j <= csize - 1; j++) { srcidx = -1; if (subsettype == 0) { srcidx = cstart + j; } if (subsettype == 1) { srcidx = idx[cstart + j]; } alglib.ap.assert(srcidx >= 0, "MLPGradBatchX: internal error"); if (datasettype == 0) { for (i_ = 0; i_ <= rowsize - 1; i_++) { pbuf.xy[j, i_] = densexy[srcidx, i_]; } } if (datasettype == 1) { sparse.sparsegetrow(sparsexy, srcidx, ref pbuf.xyrow); for (i_ = 0; i_ <= rowsize - 1; i_++) { pbuf.xy[j, i_] = pbuf.xyrow[i_]; } } } // // Process chunk and advance line pointer // mlpchunkedgradient(network, pbuf.xy, 0, csize, pbuf.batch4buf, pbuf.hpcbuf, ref sgrad.f, false); cstart = cstart + pbuf.chunksize; } hpccores.hpcfinalizechunkedgradient(pbuf, sgrad.g); alglib.smp.ae_shared_pool_recycle(buf, ref pbuf); alglib.smp.ae_shared_pool_recycle(gradbuf, ref sgrad); }
/************************************************************************* Serializer: unserialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void dfunserialize(alglib.serializer s, decisionforest forest) { int i0 = 0; int i1 = 0; // // check correctness of header // i0 = s.unserialize_int(); alglib.ap.assert(i0 == scodes.getrdfserializationcode(), "DFUnserialize: stream header corrupted"); i1 = s.unserialize_int(); alglib.ap.assert(i1 == dffirstversion, "DFUnserialize: stream header corrupted"); // // Unserialize data // forest.nvars = s.unserialize_int(); forest.nclasses = s.unserialize_int(); forest.ntrees = s.unserialize_int(); forest.bufsize = s.unserialize_int(); apserv.unserializerealarray(s, ref forest.trees); }
/************************************************************************* Serializer: serialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void dfserialize(alglib.serializer s, decisionforest forest) { s.serialize_int(scodes.getrdfserializationcode()); s.serialize_int(dffirstversion); s.serialize_int(forest.nvars); s.serialize_int(forest.nclasses); s.serialize_int(forest.ntrees); s.serialize_int(forest.bufsize); apserv.serializerealarray(s, forest.trees, forest.bufsize); }
/************************************************************************* This function initializes temporaries needed for training session. *************************************************************************/ private static void initmlptrnsessions(mlpbase.multilayerperceptron networktrained, bool randomizenetwork, mlptrainer trainer, alglib.smp.shared_pool sessions) { int[] dummysubset = new int[0]; smlptrnsession t = new smlptrnsession(); smlptrnsession p = null; if (alglib.smp.ae_shared_pool_is_initialized(sessions)) { // // Pool was already initialized. // Clear sessions stored in the pool. // alglib.smp.ae_shared_pool_first_recycled(sessions, ref p); while (p != null) { alglib.ap.assert(mlpbase.mlpsamearchitecture(p.network, networktrained), "InitMLPTrnSessions: internal consistency error"); p.bestrmserror = math.maxrealnumber; alglib.smp.ae_shared_pool_next_recycled(sessions, ref p); } } else { // // Prepare session and seed pool // initmlptrnsession(networktrained, randomizenetwork, trainer, t); alglib.smp.ae_shared_pool_set_seed(sessions, t); } }
/************************************************************************* Calculation of all types of errors -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpeallerrorsx(mlpensemble ensemble, double[,] densexy, sparse.sparsematrix sparsexy, int datasetsize, int datasettype, int[] idx, int subset0, int subset1, int subsettype, alglib.smp.shared_pool buf, mlpbase.modelerrors rep) { int i = 0; int j = 0; int nin = 0; int nout = 0; bool iscls = new bool(); int srcidx = 0; hpccores.mlpbuffers pbuf = null; mlpbase.modelerrors rep0 = new mlpbase.modelerrors(); mlpbase.modelerrors rep1 = new mlpbase.modelerrors(); int i_ = 0; int i1_ = 0; // // Get network information // nin = mlpbase.mlpgetinputscount(ensemble.network); nout = mlpbase.mlpgetoutputscount(ensemble.network); iscls = mlpbase.mlpissoftmax(ensemble.network); // // Retrieve buffer, prepare, process data, recycle buffer // alglib.smp.ae_shared_pool_retrieve(buf, ref pbuf); if (iscls) { bdss.dserrallocate(nout, ref pbuf.tmp0); } else { bdss.dserrallocate(-nout, ref pbuf.tmp0); } apserv.rvectorsetlengthatleast(ref pbuf.x, nin); apserv.rvectorsetlengthatleast(ref pbuf.y, nout); apserv.rvectorsetlengthatleast(ref pbuf.desiredy, nout); for (i = subset0; i <= subset1 - 1; i++) { srcidx = -1; if (subsettype == 0) { srcidx = i; } if (subsettype == 1) { srcidx = idx[i]; } alglib.ap.assert(srcidx >= 0, "MLPEAllErrorsX: internal error"); if (datasettype == 0) { for (i_ = 0; i_ <= nin - 1; i_++) { pbuf.x[i_] = densexy[srcidx, i_]; } } if (datasettype == 1) { sparse.sparsegetrow(sparsexy, srcidx, ref pbuf.x); } mlpeprocess(ensemble, pbuf.x, ref pbuf.y); if (mlpbase.mlpissoftmax(ensemble.network)) { if (datasettype == 0) { pbuf.desiredy[0] = densexy[srcidx, nin]; } if (datasettype == 1) { pbuf.desiredy[0] = sparse.sparseget(sparsexy, srcidx, nin); } } else { if (datasettype == 0) { i1_ = (nin) - (0); for (i_ = 0; i_ <= nout - 1; i_++) { pbuf.desiredy[i_] = densexy[srcidx, i_ + i1_]; } } if (datasettype == 1) { for (j = 0; j <= nout - 1; j++) { pbuf.desiredy[j] = sparse.sparseget(sparsexy, srcidx, nin + j); } } } bdss.dserraccumulate(ref pbuf.tmp0, pbuf.y, pbuf.desiredy); } bdss.dserrfinish(ref pbuf.tmp0); rep.relclserror = pbuf.tmp0[0]; rep.avgce = pbuf.tmp0[1] / Math.Log(2); rep.rmserror = pbuf.tmp0[2]; rep.avgerror = pbuf.tmp0[3]; rep.avgrelerror = pbuf.tmp0[4]; alglib.smp.ae_shared_pool_recycle(buf, ref pbuf); }
/************************************************************************* This function initializes temporaries needed for training session. *************************************************************************/ private static void initmlpetrnsessions(mlpbase.multilayerperceptron individualnetwork, mlptrainer trainer, alglib.smp.shared_pool sessions) { mlpetrnsession t = new mlpetrnsession(); if (!alglib.smp.ae_shared_pool_is_initialized(sessions)) { initmlpetrnsession(individualnetwork, trainer, t); alglib.smp.ae_shared_pool_set_seed(sessions, t); } }
/************************************************************************* Serializer: allocation -- ALGLIB -- Copyright 19.10.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpealloc(alglib.serializer s, mlpensemble ensemble) { s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); apserv.allocrealarray(s, ensemble.weights, -1); apserv.allocrealarray(s, ensemble.columnmeans, -1); apserv.allocrealarray(s, ensemble.columnsigmas, -1); mlpbase.mlpalloc(s, ensemble.network); }
/************************************************************************* Serialization: complex value *************************************************************************/ public static void serializecomplex(alglib.serializer s, complex v) { s.serialize_double(v.x); s.serialize_double(v.y); }
/************************************************************************* Serializer: serialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpeserialize(alglib.serializer s, mlpensemble ensemble) { s.serialize_int(scodes.getmlpeserializationcode()); s.serialize_int(mlpefirstversion); s.serialize_int(ensemble.ensemblesize); apserv.serializerealarray(s, ensemble.weights, -1); apserv.serializerealarray(s, ensemble.columnmeans, -1); apserv.serializerealarray(s, ensemble.columnsigmas, -1); mlpbase.mlpserialize(s, ensemble.network); }
/************************************************************************* Serialization: complex value *************************************************************************/ public static void serializerealarray(alglib.serializer s, double[] v, int n) { int i = 0; if (n < 0) { n = alglib.ap.len(v); } s.serialize_int(n); for (i = 0; i <= n - 1; i++) { s.serialize_double(v[i]); } }
/************************************************************************* Serializer: unserialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpeunserialize(alglib.serializer s, mlpensemble ensemble) { int i0 = 0; int i1 = 0; // // check correctness of header // i0 = s.unserialize_int(); alglib.ap.assert(i0 == scodes.getmlpeserializationcode(), "MLPEUnserialize: stream header corrupted"); i1 = s.unserialize_int(); alglib.ap.assert(i1 == mlpefirstversion, "MLPEUnserialize: stream header corrupted"); // // Create network // ensemble.ensemblesize = s.unserialize_int(); apserv.unserializerealarray(s, ref ensemble.weights); apserv.unserializerealarray(s, ref ensemble.columnmeans); apserv.unserializerealarray(s, ref ensemble.columnsigmas); mlpbase.mlpunserialize(s, ensemble.network); // // Allocate termoraries // ensemble.y = new double[mlpbase.mlpgetoutputscount(ensemble.network)]; }
/************************************************************************* Allocation of serializer: Integer array *************************************************************************/ public static void allocintegerarray(alglib.serializer s, int[] v, int n) { int i = 0; if (n < 0) { n = alglib.ap.len(v); } s.alloc_entry(); for (i = 0; i <= n - 1; i++) { s.alloc_entry(); } }
/************************************************************************* Internal subroutine for parallelization function MLPFoldCV. INPUT PARAMETERS: S - trainer object; RowSize - row size(eitherNIn+NOut or NIn+1); NRestarts - number of restarts(>=0); Folds - cross-validation set; Fold - the number of first cross-validation(>=0); DFold - the number of second cross-validation(>=Fold+1); CVY - parameter which stores the result is returned by network, training on I-th cross-validation set. It has to be preallocated. PoolDataCV- parameter for parallelization. NOTE: There are no checks on the parameters correctness. -- ALGLIB -- Copyright 25.09.2012 by Bochkanov Sergey *************************************************************************/ private static void mthreadcv(mlptrainer s, int rowsize, int nrestarts, int[] folds, int fold, int dfold, double[,] cvy, alglib.smp.shared_pool pooldatacv) { mlpparallelizationcv datacv = null; int i = 0; int i_ = 0; if (fold == dfold - 1) { // // Separate set // alglib.smp.ae_shared_pool_retrieve(pooldatacv, ref datacv); datacv.subsetsize = 0; for (i = 0; i <= s.npoints - 1; i++) { if (folds[i] != fold) { datacv.subset[datacv.subsetsize] = i; datacv.subsetsize = datacv.subsetsize + 1; } } // // Train on CV training set // mlptrainnetworkx(s, nrestarts, -1, datacv.subset, datacv.subsetsize, datacv.subset, 0, datacv.network, datacv.rep, true, datacv.trnpool); datacv.ngrad = datacv.ngrad + datacv.rep.ngrad; // // Estimate error using CV test set // for (i = 0; i <= s.npoints - 1; i++) { if (folds[i] == fold) { if (s.datatype == 0) { for (i_ = 0; i_ <= rowsize - 1; i_++) { datacv.xyrow[i_] = s.densexy[i, i_]; } } if (s.datatype == 1) { sparse.sparsegetrow(s.sparsexy, i, ref datacv.xyrow); } mlpbase.mlpprocess(datacv.network, datacv.xyrow, ref datacv.y); for (i_ = 0; i_ <= s.nout - 1; i_++) { cvy[i, i_] = datacv.y[i_]; } } } alglib.smp.ae_shared_pool_recycle(pooldatacv, ref datacv); } else { alglib.ap.assert(fold < dfold - 1, "MThreadCV: internal error(Fold>DFold-1)."); mthreadcv(s, rowsize, nrestarts, folds, fold, (fold + dfold) / 2, cvy, pooldatacv); mthreadcv(s, rowsize, nrestarts, folds, (fold + dfold) / 2, dfold, cvy, pooldatacv); } }
/************************************************************************* Unserialization: complex value *************************************************************************/ public static void unserializeintegerarray(alglib.serializer s, ref int[] v) { int n = 0; int i = 0; int t = 0; v = new int[0]; n = s.unserialize_int(); if (n == 0) { return; } v = new int[n]; for (i = 0; i <= n - 1; i++) { t = s.unserialize_int(); v[i] = t; } }
/************************************************************************* This function trains neural network passed to this function, using current dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset()) and current training settings. Training from NRestarts random starting positions is performed, best network is chosen. This function is inteded to be used internally. It may be used in several settings: * training with ValSubsetSize=0, corresponds to "normal" training with termination criteria based on S.MaxIts (steps count) and S.WStep (step size). Training sample is given by TrnSubset/TrnSubsetSize. * training with ValSubsetSize>0, corresponds to early stopping training with additional MaxIts/WStep stopping criteria. Training sample is given by TrnSubset/TrnSubsetSize, validation sample is given by ValSubset/ ValSubsetSize. -- ALGLIB -- Copyright 13.08.2012 by Bochkanov Sergey *************************************************************************/ private static void mlptrainnetworkx(mlptrainer s, int nrestarts, int algokind, int[] trnsubset, int trnsubsetsize, int[] valsubset, int valsubsetsize, mlpbase.multilayerperceptron network, mlpreport rep, bool isrootcall, alglib.smp.shared_pool sessions) { mlpbase.modelerrors modrep = new mlpbase.modelerrors(); double eval = 0; double ebest = 0; int ngradbatch = 0; int nin = 0; int nout = 0; int wcount = 0; int pcount = 0; int itbest = 0; int itcnt = 0; int ntype = 0; int ttype = 0; bool rndstart = new bool(); int i = 0; int nr0 = 0; int nr1 = 0; mlpreport rep0 = new mlpreport(); mlpreport rep1 = new mlpreport(); bool randomizenetwork = new bool(); double bestrmserror = 0; smlptrnsession psession = null; int i_ = 0; mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); // // Process root call // if (isrootcall) { // // Check correctness of parameters // alglib.ap.assert(algokind == 0 || algokind == -1, "MLPTrainNetworkX: unexpected AlgoKind"); alglib.ap.assert(s.npoints >= 0, "MLPTrainNetworkX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)"); if (s.rcpar) { ttype = 0; } else { ttype = 1; } if (!mlpbase.mlpissoftmax(network)) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype == ttype, "MLPTrainNetworkX: internal error - type of the training network is not similar to network type in trainer object"); alglib.ap.assert(s.nin == nin, "MLPTrainNetworkX: internal error - number of inputs in trainer is not equal to number of inputs in the training network."); alglib.ap.assert(s.nout == nout, "MLPTrainNetworkX: internal error - number of outputs in trainer is not equal to number of outputs in the training network."); alglib.ap.assert(nrestarts >= 0, "MLPTrainNetworkX: internal error - NRestarts<0."); alglib.ap.assert(alglib.ap.len(trnsubset) >= trnsubsetsize, "MLPTrainNetworkX: internal error - parameter TrnSubsetSize more than input subset size(Length(TrnSubset)<TrnSubsetSize)"); for (i = 0; i <= trnsubsetsize - 1; i++) { alglib.ap.assert(trnsubset[i] >= 0 && trnsubset[i] <= s.npoints - 1, "MLPTrainNetworkX: internal error - parameter TrnSubset contains incorrect index(TrnSubset[I]<0 or TrnSubset[I]>S.NPoints-1)"); } alglib.ap.assert(alglib.ap.len(valsubset) >= valsubsetsize, "MLPTrainNetworkX: internal error - parameter ValSubsetSize more than input subset size(Length(ValSubset)<ValSubsetSize)"); for (i = 0; i <= valsubsetsize - 1; i++) { alglib.ap.assert(valsubset[i] >= 0 && valsubset[i] <= s.npoints - 1, "MLPTrainNetworkX: internal error - parameter ValSubset contains incorrect index(ValSubset[I]<0 or ValSubset[I]>S.NPoints-1)"); } // // Train // randomizenetwork = nrestarts > 0; initmlptrnsessions(network, randomizenetwork, s, sessions); mlptrainnetworkx(s, nrestarts, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, rep, false, sessions); // // Choose best network // bestrmserror = math.maxrealnumber; alglib.smp.ae_shared_pool_first_recycled(sessions, ref psession); while (psession != null) { if ((double)(psession.bestrmserror) < (double)(bestrmserror)) { mlpbase.mlpimporttunableparameters(network, psession.bestparameters); bestrmserror = psession.bestrmserror; } alglib.smp.ae_shared_pool_next_recycled(sessions, ref psession); } // // Calculate errors // if (s.datatype == 0) { mlpbase.mlpallerrorssubset(network, s.densexy, s.npoints, trnsubset, trnsubsetsize, modrep); } if (s.datatype == 1) { mlpbase.mlpallerrorssparsesubset(network, s.sparsexy, s.npoints, trnsubset, trnsubsetsize, modrep); } rep.relclserror = modrep.relclserror; rep.avgce = modrep.avgce; rep.rmserror = modrep.rmserror; rep.avgerror = modrep.avgerror; rep.avgrelerror = modrep.avgrelerror; // // Done // return; } // // Split problem, if we have more than 1 restart // if (nrestarts >= 2) { // // Divide problem with NRestarts into two: NR0 and NR1. // nr0 = nrestarts / 2; nr1 = nrestarts - nr0; mlptrainnetworkx(s, nr0, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, rep0, false, sessions); mlptrainnetworkx(s, nr1, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, rep1, false, sessions); // // Aggregate results // rep.ngrad = rep0.ngrad + rep1.ngrad; rep.nhess = rep0.nhess + rep1.nhess; rep.ncholesky = rep0.ncholesky + rep1.ncholesky; // // Done :) // return; } // // Execution with NRestarts=1 or NRestarts=0: // * NRestarts=1 means that network is restarted from random position // * NRestarts=0 means that network is not randomized // alglib.ap.assert(nrestarts == 0 || nrestarts == 1, "MLPTrainNetworkX: internal error"); rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; alglib.smp.ae_shared_pool_retrieve(sessions, ref psession); if (((s.datatype == 0 || s.datatype == 1) && s.npoints > 0) && trnsubsetsize != 0) { // // Train network using combination of early stopping and step-size // and step-count based criteria. Network state with best value of // validation set error is stored in WBuf0. When validation set is // zero, most recent state of network is stored. // rndstart = nrestarts != 0; ngradbatch = 0; eval = 0; ebest = 0; itbest = 0; itcnt = 0; mlpstarttrainingx(s, rndstart, algokind, trnsubset, trnsubsetsize, psession); if (s.datatype == 0) { ebest = mlpbase.mlperrorsubset(psession.network, s.densexy, s.npoints, valsubset, valsubsetsize); } if (s.datatype == 1) { ebest = mlpbase.mlperrorsparsesubset(psession.network, s.sparsexy, s.npoints, valsubset, valsubsetsize); } for (i_ = 0; i_ <= wcount - 1; i_++) { psession.wbuf0[i_] = psession.network.weights[i_]; } while (mlpcontinuetrainingx(s, trnsubset, trnsubsetsize, ref ngradbatch, psession)) { if (s.datatype == 0) { eval = mlpbase.mlperrorsubset(psession.network, s.densexy, s.npoints, valsubset, valsubsetsize); } if (s.datatype == 1) { eval = mlpbase.mlperrorsparsesubset(psession.network, s.sparsexy, s.npoints, valsubset, valsubsetsize); } if ((double)(eval) <= (double)(ebest) || valsubsetsize == 0) { for (i_ = 0; i_ <= wcount - 1; i_++) { psession.wbuf0[i_] = psession.network.weights[i_]; } ebest = eval; itbest = itcnt; } if (itcnt > 30 && (double)(itcnt) > (double)(1.5 * itbest)) { break; } itcnt = itcnt + 1; } for (i_ = 0; i_ <= wcount - 1; i_++) { psession.network.weights[i_] = psession.wbuf0[i_]; } rep.ngrad = ngradbatch; } else { for (i = 0; i <= wcount - 1; i++) { psession.network.weights[i] = 0; } } // // Evaluate network performance and update PSession.BestParameters/BestRMSError // (if needed). // if (s.datatype == 0) { mlpbase.mlpallerrorssubset(psession.network, s.densexy, s.npoints, trnsubset, trnsubsetsize, modrep); } if (s.datatype == 1) { mlpbase.mlpallerrorssparsesubset(psession.network, s.sparsexy, s.npoints, trnsubset, trnsubsetsize, modrep); } if ((double)(modrep.rmserror) < (double)(psession.bestrmserror)) { mlpbase.mlpexporttunableparameters(psession.network, ref psession.bestparameters, ref pcount); psession.bestrmserror = modrep.rmserror; } // // Move session back to pool // alglib.smp.ae_shared_pool_recycle(sessions, ref psession); }
/************************************************************************* Serialization: complex value *************************************************************************/ public static void serializerealmatrix(alglib.serializer s, double[,] v, int n0, int n1) { int i = 0; int j = 0; if (n0 < 0) { n0 = alglib.ap.rows(v); } if (n1 < 0) { n1 = alglib.ap.cols(v); } s.serialize_int(n0); s.serialize_int(n1); for (i = 0; i <= n0 - 1; i++) { for (j = 0; j <= n1 - 1; j++) { s.serialize_double(v[i, j]); } } }
/************************************************************************* This function trains neural network ensemble passed to this function using current dataset and early stopping training algorithm. Each early stopping round performs NRestarts random restarts (thus, EnsembleSize*NRestarts training rounds is performed in total). -- ALGLIB -- Copyright 22.08.2012 by Bochkanov Sergey *************************************************************************/ private static void mlptrainensemblex(mlptrainer s, mlpe.mlpensemble ensemble, int idx0, int idx1, int nrestarts, int trainingmethod, apserv.sinteger ngrad, bool isrootcall, alglib.smp.shared_pool esessions) { int pcount = 0; int nin = 0; int nout = 0; int wcount = 0; int i = 0; int j = 0; int k = 0; int trnsubsetsize = 0; int valsubsetsize = 0; int k0 = 0; apserv.sinteger ngrad0 = new apserv.sinteger(); apserv.sinteger ngrad1 = new apserv.sinteger(); mlpetrnsession psession = null; hqrnd.hqrndstate rs = new hqrnd.hqrndstate(); int i_ = 0; int i1_ = 0; nin = mlpbase.mlpgetinputscount(ensemble.network); nout = mlpbase.mlpgetoutputscount(ensemble.network); wcount = mlpbase.mlpgetweightscount(ensemble.network); if (mlpbase.mlpissoftmax(ensemble.network)) { pcount = nin; } else { pcount = nin + nout; } if (nrestarts <= 0) { nrestarts = 1; } // // Handle degenerate case // if (s.npoints < 2) { for (i = idx0; i <= idx1 - 1; i++) { for (j = 0; j <= wcount - 1; j++) { ensemble.weights[i * wcount + j] = 0.0; } for (j = 0; j <= pcount - 1; j++) { ensemble.columnmeans[i * pcount + j] = 0.0; ensemble.columnsigmas[i * pcount + j] = 1.0; } } return; } // // Process root call // if (isrootcall) { // // Prepare: // * prepare MLPETrnSessions // * fill ensemble by zeros (helps to detect errors) // initmlpetrnsessions(ensemble.network, s, esessions); for (i = idx0; i <= idx1 - 1; i++) { for (j = 0; j <= wcount - 1; j++) { ensemble.weights[i * wcount + j] = 0.0; } for (j = 0; j <= pcount - 1; j++) { ensemble.columnmeans[i * pcount + j] = 0.0; ensemble.columnsigmas[i * pcount + j] = 0.0; } } // // Train in non-root mode and exit // mlptrainensemblex(s, ensemble, idx0, idx1, nrestarts, trainingmethod, ngrad, false, esessions); return; } // // Split problem // if (idx1 - idx0 >= 2) { k0 = (idx1 - idx0) / 2; ngrad0.val = 0; ngrad1.val = 0; mlptrainensemblex(s, ensemble, idx0, idx0 + k0, nrestarts, trainingmethod, ngrad0, false, esessions); mlptrainensemblex(s, ensemble, idx0 + k0, idx1, nrestarts, trainingmethod, ngrad1, false, esessions); ngrad.val = ngrad0.val + ngrad1.val; return; } // // Retrieve and prepare session // alglib.smp.ae_shared_pool_retrieve(esessions, ref psession); // // Train // hqrnd.hqrndrandomize(rs); for (k = idx0; k <= idx1 - 1; k++) { // // Split set // trnsubsetsize = 0; valsubsetsize = 0; if (trainingmethod == 0) { do { trnsubsetsize = 0; valsubsetsize = 0; for (i = 0; i <= s.npoints - 1; i++) { if ((double)(math.randomreal()) < (double)(0.66)) { // // Assign sample to training set // psession.trnsubset[trnsubsetsize] = i; trnsubsetsize = trnsubsetsize + 1; } else { // // Assign sample to validation set // psession.valsubset[valsubsetsize] = i; valsubsetsize = valsubsetsize + 1; } } } while (!(trnsubsetsize != 0 && valsubsetsize != 0)); } if (trainingmethod == 1) { valsubsetsize = 0; trnsubsetsize = s.npoints; for (i = 0; i <= s.npoints - 1; i++) { psession.trnsubset[i] = hqrnd.hqrnduniformi(rs, s.npoints); } } // // Train // mlptrainnetworkx(s, nrestarts, -1, psession.trnsubset, trnsubsetsize, psession.valsubset, valsubsetsize, psession.network, psession.mlprep, true, psession.mlpsessions); ngrad.val = ngrad.val + psession.mlprep.ngrad; // // Save results // i1_ = (0) - (k * wcount); for (i_ = k * wcount; i_ <= (k + 1) * wcount - 1; i_++) { ensemble.weights[i_] = psession.network.weights[i_ + i1_]; } i1_ = (0) - (k * pcount); for (i_ = k * pcount; i_ <= (k + 1) * pcount - 1; i_++) { ensemble.columnmeans[i_] = psession.network.columnmeans[i_ + i1_]; } i1_ = (0) - (k * pcount); for (i_ = k * pcount; i_ <= (k + 1) * pcount - 1; i_++) { ensemble.columnsigmas[i_] = psession.network.columnsigmas[i_ + i1_]; } } // // Recycle session // alglib.smp.ae_shared_pool_recycle(esessions, ref psession); }
/************************************************************************* Recurrent code for RankData(), splits problem into subproblems or calls basecase code (depending on problem complexity). INPUT PARAMETERS: XY - array[NPoints,NFeatures], dataset I0 - index of first row to process I1 - index of past-the-last row to process; this function processes half-interval [I0,I1). NFeatures- number of features IsCentered- whether ranks are centered or not: * True - ranks are centered in such way that their within-row sum is zero * False - ranks are not centered Pool - shared pool which holds temporary buffers (APBuffers structure) BasecaseCost-minimum cost of the problem which will be split OUTPUT PARAMETERS: XY - data in [I0,I1) are replaced by their within-row ranks; ranking starts from 0, ends at NFeatures-1 -- ALGLIB -- Copyright 18.04.2013 by Bochkanov Sergey *************************************************************************/ private static void rankdatarec(double[,] xy, int i0, int i1, int nfeatures, bool iscentered, alglib.smp.shared_pool pool, int basecasecost) { apserv.apbuffers buf0 = null; apserv.apbuffers buf1 = null; double problemcost = 0; int im = 0; alglib.ap.assert(i1 >= i0, "RankDataRec: internal error"); // // Recursively split problem, if it is too large // problemcost = apserv.inttoreal(i1 - i0) * apserv.inttoreal(nfeatures) * apserv.logbase2(nfeatures); if (i1 - i0 >= 2 && (double)(problemcost) > (double)(basecasecost)) { im = (i1 + i0) / 2; rankdatarec(xy, i0, im, nfeatures, iscentered, pool, basecasecost); rankdatarec(xy, im, i1, nfeatures, iscentered, pool, basecasecost); return; } // // Retrieve buffers from pool, call serial code, return buffers to pool // alglib.smp.ae_shared_pool_retrieve(pool, ref buf0); alglib.smp.ae_shared_pool_retrieve(pool, ref buf1); rankdatabasecase(xy, i0, i1, nfeatures, iscentered, buf0, buf1); alglib.smp.ae_shared_pool_recycle(pool, ref buf0); alglib.smp.ae_shared_pool_recycle(pool, ref buf1); }
/************************************************************************* Serializer: unserialization -- ALGLIB -- Copyright 02.02.2012 by Bochkanov Sergey *************************************************************************/ public static void rbfunserialize(alglib.serializer s, rbfmodel model) { int i0 = 0; int i1 = 0; int nx = 0; int ny = 0; // // Header // i0 = s.unserialize_int(); alglib.ap.assert(i0 == scodes.getrbfserializationcode(), "RBFUnserialize: stream header corrupted"); i1 = s.unserialize_int(); alglib.ap.assert(i1 == rbffirstversion, "RBFUnserialize: stream header corrupted"); // // Unserialize primary model parameters, initialize model. // // It is necessary to call RBFCreate() because some internal fields // which are NOT unserialized will need initialization. // nx = s.unserialize_int(); ny = s.unserialize_int(); rbfcreate(nx, ny, model); model.nc = s.unserialize_int(); model.nl = s.unserialize_int(); nearestneighbor.kdtreeunserialize(s, model.tree); apserv.unserializerealmatrix(s, ref model.xc); apserv.unserializerealmatrix(s, ref model.wr); model.rmax = s.unserialize_double(); apserv.unserializerealmatrix(s, ref model.v); }