/************************************************************************* Serializer: unserialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void dfunserialize(alglib.serializer s, decisionforest forest) { int i0 = 0; int i1 = 0; // // check correctness of header // i0 = s.unserialize_int(); alglib.ap.assert(i0 == scodes.getrdfserializationcode(), "DFUnserialize: stream header corrupted"); i1 = s.unserialize_int(); alglib.ap.assert(i1 == dffirstversion, "DFUnserialize: stream header corrupted"); // // Unserialize data // forest.nvars = s.unserialize_int(); forest.nclasses = s.unserialize_int(); forest.ntrees = s.unserialize_int(); forest.bufsize = s.unserialize_int(); apserv.unserializerealarray(s, ref forest.trees); }
/************************************************************************* Unserialization: complex value *************************************************************************/ public static void unserializeintegerarray(alglib.serializer s, ref int[] v) { int n = 0; int i = 0; int t = 0; v = new int[0]; n = s.unserialize_int(); if (n == 0) { return; } v = new int[n]; for (i = 0; i <= n - 1; i++) { t = s.unserialize_int(); v[i] = t; } }
/************************************************************************* Serialization: complex value *************************************************************************/ public static void serializerealmatrix(alglib.serializer s, double[,] v, int n0, int n1) { int i = 0; int j = 0; if (n0 < 0) { n0 = alglib.ap.rows(v); } if (n1 < 0) { n1 = alglib.ap.cols(v); } s.serialize_int(n0); s.serialize_int(n1); for (i = 0; i <= n0 - 1; i++) { for (j = 0; j <= n1 - 1; j++) { s.serialize_double(v[i, j]); } } }
/************************************************************************* Serialization: complex value *************************************************************************/ public static void serializerealarray(alglib.serializer s, double[] v, int n) { int i = 0; if (n < 0) { n = alglib.ap.len(v); } s.serialize_int(n); for (i = 0; i <= n - 1; i++) { s.serialize_double(v[i]); } }
/************************************************************************* Allocation of serializer: Integer array *************************************************************************/ public static void allocintegerarray(alglib.serializer s, int[] v, int n) { int i = 0; if (n < 0) { n = alglib.ap.len(v); } s.alloc_entry(); for (i = 0; i <= n - 1; i++) { s.alloc_entry(); } }
/************************************************************************* Serializer: unserialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void kdtreeunserialize(alglib.serializer s, kdtree tree) { int i0 = 0; int i1 = 0; // // check correctness of header // i0 = s.unserialize_int(); alglib.ap.assert(i0 == scodes.getkdtreeserializationcode(), "KDTreeUnserialize: stream header corrupted"); i1 = s.unserialize_int(); alglib.ap.assert(i1 == kdtreefirstversion, "KDTreeUnserialize: stream header corrupted"); // // Unserialize data // tree.n = s.unserialize_int(); tree.nx = s.unserialize_int(); tree.ny = s.unserialize_int(); tree.normtype = s.unserialize_int(); apserv.unserializerealmatrix(s, ref tree.xy); apserv.unserializeintegerarray(s, ref tree.tags); apserv.unserializerealarray(s, ref tree.boxmin); apserv.unserializerealarray(s, ref tree.boxmax); apserv.unserializeintegerarray(s, ref tree.nodes); apserv.unserializerealarray(s, ref tree.splits); kdtreealloctemporaries(tree, tree.n, tree.nx, tree.ny); }
/************************************************************************* Serialization: complex value *************************************************************************/ public static void serializecomplex(alglib.serializer s, complex v) { s.serialize_double(v.x); s.serialize_double(v.y); }
/************************************************************************* Calculation of all types of errors -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpeallerrorsx(mlpensemble ensemble, double[,] densexy, sparse.sparsematrix sparsexy, int datasetsize, int datasettype, int[] idx, int subset0, int subset1, int subsettype, alglib.smp.shared_pool buf, mlpbase.modelerrors rep) { int i = 0; int j = 0; int nin = 0; int nout = 0; bool iscls = new bool(); int srcidx = 0; hpccores.mlpbuffers pbuf = null; mlpbase.modelerrors rep0 = new mlpbase.modelerrors(); mlpbase.modelerrors rep1 = new mlpbase.modelerrors(); int i_ = 0; int i1_ = 0; // // Get network information // nin = mlpbase.mlpgetinputscount(ensemble.network); nout = mlpbase.mlpgetoutputscount(ensemble.network); iscls = mlpbase.mlpissoftmax(ensemble.network); // // Retrieve buffer, prepare, process data, recycle buffer // alglib.smp.ae_shared_pool_retrieve(buf, ref pbuf); if (iscls) { bdss.dserrallocate(nout, ref pbuf.tmp0); } else { bdss.dserrallocate(-nout, ref pbuf.tmp0); } apserv.rvectorsetlengthatleast(ref pbuf.x, nin); apserv.rvectorsetlengthatleast(ref pbuf.y, nout); apserv.rvectorsetlengthatleast(ref pbuf.desiredy, nout); for (i = subset0; i <= subset1 - 1; i++) { srcidx = -1; if (subsettype == 0) { srcidx = i; } if (subsettype == 1) { srcidx = idx[i]; } alglib.ap.assert(srcidx >= 0, "MLPEAllErrorsX: internal error"); if (datasettype == 0) { for (i_ = 0; i_ <= nin - 1; i_++) { pbuf.x[i_] = densexy[srcidx, i_]; } } if (datasettype == 1) { sparse.sparsegetrow(sparsexy, srcidx, ref pbuf.x); } mlpeprocess(ensemble, pbuf.x, ref pbuf.y); if (mlpbase.mlpissoftmax(ensemble.network)) { if (datasettype == 0) { pbuf.desiredy[0] = densexy[srcidx, nin]; } if (datasettype == 1) { pbuf.desiredy[0] = sparse.sparseget(sparsexy, srcidx, nin); } } else { if (datasettype == 0) { i1_ = (nin) - (0); for (i_ = 0; i_ <= nout - 1; i_++) { pbuf.desiredy[i_] = densexy[srcidx, i_ + i1_]; } } if (datasettype == 1) { for (j = 0; j <= nout - 1; j++) { pbuf.desiredy[j] = sparse.sparseget(sparsexy, srcidx, nin + j); } } } bdss.dserraccumulate(ref pbuf.tmp0, pbuf.y, pbuf.desiredy); } bdss.dserrfinish(ref pbuf.tmp0); rep.relclserror = pbuf.tmp0[0]; rep.avgce = pbuf.tmp0[1] / Math.Log(2); rep.rmserror = pbuf.tmp0[2]; rep.avgerror = pbuf.tmp0[3]; rep.avgrelerror = pbuf.tmp0[4]; alglib.smp.ae_shared_pool_recycle(buf, ref pbuf); }
/************************************************************************* Serializer: allocation -- ALGLIB -- Copyright 19.10.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpealloc(alglib.serializer s, mlpensemble ensemble) { s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); apserv.allocrealarray(s, ensemble.weights, -1); apserv.allocrealarray(s, ensemble.columnmeans, -1); apserv.allocrealarray(s, ensemble.columnsigmas, -1); mlpbase.mlpalloc(s, ensemble.network); }
/************************************************************************* Serializer: unserialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpunserialize(alglib.serializer s, multilayerperceptron network) { int i0 = 0; int i1 = 0; int i = 0; int j = 0; int k = 0; int fkind = 0; double threshold = 0; double v0 = 0; double v1 = 0; int nin = 0; int nout = 0; bool issoftmax = new bool(); int[] layersizes = new int[0]; // // check correctness of header // i0 = s.unserialize_int(); alglib.ap.assert(i0 == scodes.getmlpserializationcode(), "MLPUnserialize: stream header corrupted"); i1 = s.unserialize_int(); alglib.ap.assert(i1 == mlpfirstversion, "MLPUnserialize: stream header corrupted"); // // Create network // issoftmax = s.unserialize_bool(); apserv.unserializeintegerarray(s, ref layersizes); alglib.ap.assert((alglib.ap.len(layersizes) == 2 || alglib.ap.len(layersizes) == 3) || alglib.ap.len(layersizes) == 4, "MLPUnserialize: too many hidden layers!"); nin = layersizes[0]; nout = layersizes[alglib.ap.len(layersizes) - 1]; if (alglib.ap.len(layersizes) == 2) { if (issoftmax) { mlpcreatec0(layersizes[0], layersizes[1], network); } else { mlpcreate0(layersizes[0], layersizes[1], network); } } if (alglib.ap.len(layersizes) == 3) { if (issoftmax) { mlpcreatec1(layersizes[0], layersizes[1], layersizes[2], network); } else { mlpcreate1(layersizes[0], layersizes[1], layersizes[2], network); } } if (alglib.ap.len(layersizes) == 4) { if (issoftmax) { mlpcreatec2(layersizes[0], layersizes[1], layersizes[2], layersizes[3], network); } else { mlpcreate2(layersizes[0], layersizes[1], layersizes[2], layersizes[3], network); } } // // Load neurons and weights // for (i = 1; i <= alglib.ap.len(layersizes) - 1; i++) { for (j = 0; j <= layersizes[i] - 1; j++) { fkind = s.unserialize_int(); threshold = s.unserialize_double(); mlpsetneuroninfo(network, i, j, fkind, threshold); for (k = 0; k <= layersizes[i - 1] - 1; k++) { v0 = s.unserialize_double(); mlpsetweight(network, i - 1, k, i, j, v0); } } } // // Load standartizator // for (j = 0; j <= nin - 1; j++) { v0 = s.unserialize_double(); v1 = s.unserialize_double(); mlpsetinputscaling(network, j, v0, v1); } for (j = 0; j <= nout - 1; j++) { v0 = s.unserialize_double(); v1 = s.unserialize_double(); mlpsetoutputscaling(network, j, v0, v1); } }
public static void mlpallerrorsx(multilayerperceptron network, double[,] densexy, sparse.sparsematrix sparsexy, int datasetsize, int datasettype, int[] idx, int subset0, int subset1, int subsettype, alglib.smp.shared_pool buf, modelerrors rep) { int nin = 0; int nout = 0; int wcount = 0; int rowsize = 0; bool iscls = new bool(); int srcidx = 0; int cstart = 0; int csize = 0; int j = 0; hpccores.mlpbuffers pbuf = null; int len0 = 0; int len1 = 0; modelerrors rep0 = new modelerrors(); modelerrors rep1 = new modelerrors(); int i_ = 0; int i1_ = 0; alglib.ap.assert(datasetsize >= 0, "MLPAllErrorsX: SetSize<0"); alglib.ap.assert(datasettype == 0 || datasettype == 1, "MLPAllErrorsX: DatasetType is incorrect"); alglib.ap.assert(subsettype == 0 || subsettype == 1, "MLPAllErrorsX: SubsetType is incorrect"); // // Determine network properties // mlpproperties(network, ref nin, ref nout, ref wcount); iscls = mlpissoftmax(network); // // Split problem. // // Splitting problem allows us to reduce effect of single-precision // arithmetics (SSE-optimized version of MLPChunkedProcess uses single // precision internally, but converts them to double precision after // results are exported from HPC buffer to network). Small batches are // calculated in single precision, results are aggregated in double // precision, and it allows us to avoid accumulation of errors when // we process very large batches (tens of thousands of items). // // NOTE: it is important to use real arithmetics for ProblemCost // because ProblemCost may be larger than MAXINT. // if (subset1 - subset0 >= 2 * microbatchsize && (double)(apserv.inttoreal(subset1 - subset0) * apserv.inttoreal(wcount)) > (double)(gradbasecasecost)) { apserv.splitlength(subset1 - subset0, microbatchsize, ref len0, ref len1); mlpallerrorsx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0, subset0 + len0, subsettype, buf, rep0); mlpallerrorsx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0 + len0, subset1, subsettype, buf, rep1); rep.relclserror = (len0 * rep0.relclserror + len1 * rep1.relclserror) / (len0 + len1); rep.avgce = (len0 * rep0.avgce + len1 * rep1.avgce) / (len0 + len1); rep.rmserror = Math.Sqrt((len0 * math.sqr(rep0.rmserror) + len1 * math.sqr(rep1.rmserror)) / (len0 + len1)); rep.avgerror = (len0 * rep0.avgerror + len1 * rep1.avgerror) / (len0 + len1); rep.avgrelerror = (len0 * rep0.avgrelerror + len1 * rep1.avgrelerror) / (len0 + len1); return; } // // Retrieve and prepare // alglib.smp.ae_shared_pool_retrieve(buf, ref pbuf); if (iscls) { rowsize = nin + 1; bdss.dserrallocate(nout, ref pbuf.tmp0); } else { rowsize = nin + nout; bdss.dserrallocate(-nout, ref pbuf.tmp0); } // // Processing // hpccores.hpcpreparechunkedgradient(network.weights, wcount, mlpntotal(network), nin, nout, pbuf); cstart = subset0; while (cstart < subset1) { // // Determine size of current chunk and copy it to PBuf.XY // csize = Math.Min(subset1, cstart + pbuf.chunksize) - cstart; for (j = 0; j <= csize - 1; j++) { srcidx = -1; if (subsettype == 0) { srcidx = cstart + j; } if (subsettype == 1) { srcidx = idx[cstart + j]; } alglib.ap.assert(srcidx >= 0, "MLPAllErrorsX: internal error"); if (datasettype == 0) { for (i_ = 0; i_ <= rowsize - 1; i_++) { pbuf.xy[j, i_] = densexy[srcidx, i_]; } } if (datasettype == 1) { sparse.sparsegetrow(sparsexy, srcidx, ref pbuf.xyrow); for (i_ = 0; i_ <= rowsize - 1; i_++) { pbuf.xy[j, i_] = pbuf.xyrow[i_]; } } } // // Unpack XY and process (temporary code, to be replaced by chunked processing) // for (j = 0; j <= csize - 1; j++) { for (i_ = 0; i_ <= rowsize - 1; i_++) { pbuf.xy2[j, i_] = pbuf.xy[j, i_]; } } mlpchunkedprocess(network, pbuf.xy2, 0, csize, pbuf.batch4buf, pbuf.hpcbuf); for (j = 0; j <= csize - 1; j++) { for (i_ = 0; i_ <= nin - 1; i_++) { pbuf.x[i_] = pbuf.xy2[j, i_]; } i1_ = (nin) - (0); for (i_ = 0; i_ <= nout - 1; i_++) { pbuf.y[i_] = pbuf.xy2[j, i_ + i1_]; } if (iscls) { pbuf.desiredy[0] = pbuf.xy[j, nin]; } else { i1_ = (nin) - (0); for (i_ = 0; i_ <= nout - 1; i_++) { pbuf.desiredy[i_] = pbuf.xy[j, i_ + i1_]; } } bdss.dserraccumulate(ref pbuf.tmp0, pbuf.y, pbuf.desiredy); } // // Process chunk and advance line pointer // cstart = cstart + pbuf.chunksize; } bdss.dserrfinish(ref pbuf.tmp0); rep.relclserror = pbuf.tmp0[0]; rep.avgce = pbuf.tmp0[1] / Math.Log(2); rep.rmserror = pbuf.tmp0[2]; rep.avgerror = pbuf.tmp0[3]; rep.avgrelerror = pbuf.tmp0[4]; // // Recycle // alglib.smp.ae_shared_pool_recycle(buf, ref pbuf); }
/************************************************************************* Serializer: serialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpserialize(alglib.serializer s, multilayerperceptron network) { int i = 0; int j = 0; int k = 0; int fkind = 0; double threshold = 0; double v0 = 0; double v1 = 0; int nin = 0; int nout = 0; nin = network.hllayersizes[0]; nout = network.hllayersizes[alglib.ap.len(network.hllayersizes) - 1]; s.serialize_int(scodes.getmlpserializationcode()); s.serialize_int(mlpfirstversion); s.serialize_bool(mlpissoftmax(network)); apserv.serializeintegerarray(s, network.hllayersizes, -1); for (i = 1; i <= alglib.ap.len(network.hllayersizes) - 1; i++) { for (j = 0; j <= network.hllayersizes[i] - 1; j++) { mlpgetneuroninfo(network, i, j, ref fkind, ref threshold); s.serialize_int(fkind); s.serialize_double(threshold); for (k = 0; k <= network.hllayersizes[i - 1] - 1; k++) { s.serialize_double(mlpgetweight(network, i - 1, k, i, j)); } } } for (j = 0; j <= nin - 1; j++) { mlpgetinputscaling(network, j, ref v0, ref v1); s.serialize_double(v0); s.serialize_double(v1); } for (j = 0; j <= nout - 1; j++) { mlpgetoutputscaling(network, j, ref v0, ref v1); s.serialize_double(v0); s.serialize_double(v1); } }
/************************************************************************* Serializer: allocation -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpalloc(alglib.serializer s, multilayerperceptron network) { int i = 0; int j = 0; int k = 0; int fkind = 0; double threshold = 0; double v0 = 0; double v1 = 0; int nin = 0; int nout = 0; nin = network.hllayersizes[0]; nout = network.hllayersizes[alglib.ap.len(network.hllayersizes) - 1]; s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); apserv.allocintegerarray(s, network.hllayersizes, -1); for (i = 1; i <= alglib.ap.len(network.hllayersizes) - 1; i++) { for (j = 0; j <= network.hllayersizes[i] - 1; j++) { mlpgetneuroninfo(network, i, j, ref fkind, ref threshold); s.alloc_entry(); s.alloc_entry(); for (k = 0; k <= network.hllayersizes[i - 1] - 1; k++) { s.alloc_entry(); } } } for (j = 0; j <= nin - 1; j++) { mlpgetinputscaling(network, j, ref v0, ref v1); s.alloc_entry(); s.alloc_entry(); } for (j = 0; j <= nout - 1; j++) { mlpgetoutputscaling(network, j, ref v0, ref v1); s.alloc_entry(); s.alloc_entry(); } }
public static void mlpgradbatchx(multilayerperceptron network, double[,] densexy, sparse.sparsematrix sparsexy, int datasetsize, int datasettype, int[] idx, int subset0, int subset1, int subsettype, alglib.smp.shared_pool buf, alglib.smp.shared_pool gradbuf) { int nin = 0; int nout = 0; int wcount = 0; int rowsize = 0; int srcidx = 0; int cstart = 0; int csize = 0; int j = 0; double problemcost = 0; hpccores.mlpbuffers buf2 = null; int len0 = 0; int len1 = 0; hpccores.mlpbuffers pbuf = null; smlpgrad sgrad = null; int i_ = 0; alglib.ap.assert(datasetsize >= 0, "MLPGradBatchX: SetSize<0"); alglib.ap.assert(datasettype == 0 || datasettype == 1, "MLPGradBatchX: DatasetType is incorrect"); alglib.ap.assert(subsettype == 0 || subsettype == 1, "MLPGradBatchX: SubsetType is incorrect"); // // Determine network and dataset properties // mlpproperties(network, ref nin, ref nout, ref wcount); if (mlpissoftmax(network)) { rowsize = nin + 1; } else { rowsize = nin + nout; } // // Split problem. // // Splitting problem allows us to reduce effect of single-precision // arithmetics (SSE-optimized version of MLPChunkedGradient uses single // precision internally, but converts them to double precision after // results are exported from HPC buffer to network). Small batches are // calculated in single precision, results are aggregated in double // precision, and it allows us to avoid accumulation of errors when // we process very large batches (tens of thousands of items). // // NOTE: it is important to use real arithmetics for ProblemCost // because ProblemCost may be larger than MAXINT. // problemcost = subset1 - subset0; problemcost = problemcost * wcount; if (subset1 - subset0 >= 2 * microbatchsize && (double)(problemcost) > (double)(gradbasecasecost)) { apserv.splitlength(subset1 - subset0, microbatchsize, ref len0, ref len1); mlpgradbatchx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0, subset0 + len0, subsettype, buf, gradbuf); mlpgradbatchx(network, densexy, sparsexy, datasetsize, datasettype, idx, subset0 + len0, subset1, subsettype, buf, gradbuf); return; } // // Chunked processing // alglib.smp.ae_shared_pool_retrieve(gradbuf, ref sgrad); alglib.smp.ae_shared_pool_retrieve(buf, ref pbuf); hpccores.hpcpreparechunkedgradient(network.weights, wcount, mlpntotal(network), nin, nout, pbuf); cstart = subset0; while (cstart < subset1) { // // Determine size of current chunk and copy it to PBuf.XY // csize = Math.Min(subset1, cstart + pbuf.chunksize) - cstart; for (j = 0; j <= csize - 1; j++) { srcidx = -1; if (subsettype == 0) { srcidx = cstart + j; } if (subsettype == 1) { srcidx = idx[cstart + j]; } alglib.ap.assert(srcidx >= 0, "MLPGradBatchX: internal error"); if (datasettype == 0) { for (i_ = 0; i_ <= rowsize - 1; i_++) { pbuf.xy[j, i_] = densexy[srcidx, i_]; } } if (datasettype == 1) { sparse.sparsegetrow(sparsexy, srcidx, ref pbuf.xyrow); for (i_ = 0; i_ <= rowsize - 1; i_++) { pbuf.xy[j, i_] = pbuf.xyrow[i_]; } } } // // Process chunk and advance line pointer // mlpchunkedgradient(network, pbuf.xy, 0, csize, pbuf.batch4buf, pbuf.hpcbuf, ref sgrad.f, false); cstart = cstart + pbuf.chunksize; } hpccores.hpcfinalizechunkedgradient(pbuf, sgrad.g); alglib.smp.ae_shared_pool_recycle(buf, ref pbuf); alglib.smp.ae_shared_pool_recycle(gradbuf, ref sgrad); }
/************************************************************************* Serializer: allocation -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void kdtreealloc(alglib.serializer s, kdtree tree) { // // Header // s.alloc_entry(); s.alloc_entry(); // // Data // s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); s.alloc_entry(); apserv.allocrealmatrix(s, tree.xy, -1, -1); apserv.allocintegerarray(s, tree.tags, -1); apserv.allocrealarray(s, tree.boxmin, -1); apserv.allocrealarray(s, tree.boxmax, -1); apserv.allocintegerarray(s, tree.nodes, -1); apserv.allocrealarray(s, tree.splits, -1); }
/************************************************************************* Serializer: serialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpeserialize(alglib.serializer s, mlpensemble ensemble) { s.serialize_int(scodes.getmlpeserializationcode()); s.serialize_int(mlpefirstversion); s.serialize_int(ensemble.ensemblesize); apserv.serializerealarray(s, ensemble.weights, -1); apserv.serializerealarray(s, ensemble.columnmeans, -1); apserv.serializerealarray(s, ensemble.columnsigmas, -1); mlpbase.mlpserialize(s, ensemble.network); }
/************************************************************************* Serializer: serialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void kdtreeserialize(alglib.serializer s, kdtree tree) { // // Header // s.serialize_int(scodes.getkdtreeserializationcode()); s.serialize_int(kdtreefirstversion); // // Data // s.serialize_int(tree.n); s.serialize_int(tree.nx); s.serialize_int(tree.ny); s.serialize_int(tree.normtype); apserv.serializerealmatrix(s, tree.xy, -1, -1); apserv.serializeintegerarray(s, tree.tags, -1); apserv.serializerealarray(s, tree.boxmin, -1); apserv.serializerealarray(s, tree.boxmax, -1); apserv.serializeintegerarray(s, tree.nodes, -1); apserv.serializerealarray(s, tree.splits, -1); }
/************************************************************************* Serializer: unserialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void mlpeunserialize(alglib.serializer s, mlpensemble ensemble) { int i0 = 0; int i1 = 0; // // check correctness of header // i0 = s.unserialize_int(); alglib.ap.assert(i0 == scodes.getmlpeserializationcode(), "MLPEUnserialize: stream header corrupted"); i1 = s.unserialize_int(); alglib.ap.assert(i1 == mlpefirstversion, "MLPEUnserialize: stream header corrupted"); // // Create network // ensemble.ensemblesize = s.unserialize_int(); apserv.unserializerealarray(s, ref ensemble.weights); apserv.unserializerealarray(s, ref ensemble.columnmeans); apserv.unserializerealarray(s, ref ensemble.columnsigmas); mlpbase.mlpunserialize(s, ensemble.network); // // Allocate termoraries // ensemble.y = new double[mlpbase.mlpgetoutputscount(ensemble.network)]; }
/************************************************************************* Allocation of serializer: complex value *************************************************************************/ public static void alloccomplex(alglib.serializer s, complex v) { s.alloc_entry(); s.alloc_entry(); }
private static void mthreadcv(mlptrainer s, int rowsize, int nrestarts, int[] folds, int fold, int dfold, double[,] cvy, alglib.smp.shared_pool pooldatacv) { mlpparallelizationcv datacv = null; int i = 0; int i_ = 0; if (fold == dfold - 1) { // // Separate set // alglib.smp.ae_shared_pool_retrieve(pooldatacv, ref datacv); datacv.subsetsize = 0; for (i = 0; i <= s.npoints - 1; i++) { if (folds[i] != fold) { datacv.subset[datacv.subsetsize] = i; datacv.subsetsize = datacv.subsetsize + 1; } } // // Train on CV training set // mlptrainnetworkx(s, nrestarts, -1, datacv.subset, datacv.subsetsize, datacv.subset, 0, datacv.network, datacv.rep, true, datacv.trnpool); datacv.ngrad = datacv.ngrad + datacv.rep.ngrad; // // Estimate error using CV test set // for (i = 0; i <= s.npoints - 1; i++) { if (folds[i] == fold) { if (s.datatype == 0) { for (i_ = 0; i_ <= rowsize - 1; i_++) { datacv.xyrow[i_] = s.densexy[i, i_]; } } if (s.datatype == 1) { sparse.sparsegetrow(s.sparsexy, i, ref datacv.xyrow); } mlpbase.mlpprocess(datacv.network, datacv.xyrow, ref datacv.y); for (i_ = 0; i_ <= s.nout - 1; i_++) { cvy[i, i_] = datacv.y[i_]; } } } alglib.smp.ae_shared_pool_recycle(pooldatacv, ref datacv); } else { alglib.ap.assert(fold < dfold - 1, "MThreadCV: internal error(Fold>DFold-1)."); mthreadcv(s, rowsize, nrestarts, folds, fold, (fold + dfold) / 2, cvy, pooldatacv); mthreadcv(s, rowsize, nrestarts, folds, (fold + dfold) / 2, dfold, cvy, pooldatacv); } }
/************************************************************************* Unserialization: complex value *************************************************************************/ public static complex unserializecomplex(alglib.serializer s) { complex result = 0; result.x = s.unserialize_double(); result.y = s.unserialize_double(); return result; }
private static void mlptrainnetworkx(mlptrainer s, int nrestarts, int algokind, int[] trnsubset, int trnsubsetsize, int[] valsubset, int valsubsetsize, mlpbase.multilayerperceptron network, mlpreport rep, bool isrootcall, alglib.smp.shared_pool sessions) { mlpbase.modelerrors modrep = new mlpbase.modelerrors(); double eval = 0; double ebest = 0; int ngradbatch = 0; int nin = 0; int nout = 0; int wcount = 0; int pcount = 0; int itbest = 0; int itcnt = 0; int ntype = 0; int ttype = 0; bool rndstart = new bool(); int i = 0; int nr0 = 0; int nr1 = 0; mlpreport rep0 = new mlpreport(); mlpreport rep1 = new mlpreport(); bool randomizenetwork = new bool(); double bestrmserror = 0; smlptrnsession psession = null; int i_ = 0; mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); // // Process root call // if (isrootcall) { // // Check correctness of parameters // alglib.ap.assert(algokind == 0 || algokind == -1, "MLPTrainNetworkX: unexpected AlgoKind"); alglib.ap.assert(s.npoints >= 0, "MLPTrainNetworkX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)"); if (s.rcpar) { ttype = 0; } else { ttype = 1; } if (!mlpbase.mlpissoftmax(network)) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype == ttype, "MLPTrainNetworkX: internal error - type of the training network is not similar to network type in trainer object"); alglib.ap.assert(s.nin == nin, "MLPTrainNetworkX: internal error - number of inputs in trainer is not equal to number of inputs in the training network."); alglib.ap.assert(s.nout == nout, "MLPTrainNetworkX: internal error - number of outputs in trainer is not equal to number of outputs in the training network."); alglib.ap.assert(nrestarts >= 0, "MLPTrainNetworkX: internal error - NRestarts<0."); alglib.ap.assert(alglib.ap.len(trnsubset) >= trnsubsetsize, "MLPTrainNetworkX: internal error - parameter TrnSubsetSize more than input subset size(Length(TrnSubset)<TrnSubsetSize)"); for (i = 0; i <= trnsubsetsize - 1; i++) { alglib.ap.assert(trnsubset[i] >= 0 && trnsubset[i] <= s.npoints - 1, "MLPTrainNetworkX: internal error - parameter TrnSubset contains incorrect index(TrnSubset[I]<0 or TrnSubset[I]>S.NPoints-1)"); } alglib.ap.assert(alglib.ap.len(valsubset) >= valsubsetsize, "MLPTrainNetworkX: internal error - parameter ValSubsetSize more than input subset size(Length(ValSubset)<ValSubsetSize)"); for (i = 0; i <= valsubsetsize - 1; i++) { alglib.ap.assert(valsubset[i] >= 0 && valsubset[i] <= s.npoints - 1, "MLPTrainNetworkX: internal error - parameter ValSubset contains incorrect index(ValSubset[I]<0 or ValSubset[I]>S.NPoints-1)"); } // // Train // randomizenetwork = nrestarts > 0; initmlptrnsessions(network, randomizenetwork, s, sessions); mlptrainnetworkx(s, nrestarts, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, rep, false, sessions); // // Choose best network // bestrmserror = math.maxrealnumber; alglib.smp.ae_shared_pool_first_recycled(sessions, ref psession); while (psession != null) { if ((double)(psession.bestrmserror) < (double)(bestrmserror)) { mlpbase.mlpimporttunableparameters(network, psession.bestparameters); bestrmserror = psession.bestrmserror; } alglib.smp.ae_shared_pool_next_recycled(sessions, ref psession); } // // Calculate errors // if (s.datatype == 0) { mlpbase.mlpallerrorssubset(network, s.densexy, s.npoints, trnsubset, trnsubsetsize, modrep); } if (s.datatype == 1) { mlpbase.mlpallerrorssparsesubset(network, s.sparsexy, s.npoints, trnsubset, trnsubsetsize, modrep); } rep.relclserror = modrep.relclserror; rep.avgce = modrep.avgce; rep.rmserror = modrep.rmserror; rep.avgerror = modrep.avgerror; rep.avgrelerror = modrep.avgrelerror; // // Done // return; } // // Split problem, if we have more than 1 restart // if (nrestarts >= 2) { // // Divide problem with NRestarts into two: NR0 and NR1. // nr0 = nrestarts / 2; nr1 = nrestarts - nr0; mlptrainnetworkx(s, nr0, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, rep0, false, sessions); mlptrainnetworkx(s, nr1, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, rep1, false, sessions); // // Aggregate results // rep.ngrad = rep0.ngrad + rep1.ngrad; rep.nhess = rep0.nhess + rep1.nhess; rep.ncholesky = rep0.ncholesky + rep1.ncholesky; // // Done :) // return; } // // Execution with NRestarts=1 or NRestarts=0: // * NRestarts=1 means that network is restarted from random position // * NRestarts=0 means that network is not randomized // alglib.ap.assert(nrestarts == 0 || nrestarts == 1, "MLPTrainNetworkX: internal error"); rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; alglib.smp.ae_shared_pool_retrieve(sessions, ref psession); if (((s.datatype == 0 || s.datatype == 1) && s.npoints > 0) && trnsubsetsize != 0) { // // Train network using combination of early stopping and step-size // and step-count based criteria. Network state with best value of // validation set error is stored in WBuf0. When validation set is // zero, most recent state of network is stored. // rndstart = nrestarts != 0; ngradbatch = 0; eval = 0; ebest = 0; itbest = 0; itcnt = 0; mlpstarttrainingx(s, rndstart, algokind, trnsubset, trnsubsetsize, psession); if (s.datatype == 0) { ebest = mlpbase.mlperrorsubset(psession.network, s.densexy, s.npoints, valsubset, valsubsetsize); } if (s.datatype == 1) { ebest = mlpbase.mlperrorsparsesubset(psession.network, s.sparsexy, s.npoints, valsubset, valsubsetsize); } for (i_ = 0; i_ <= wcount - 1; i_++) { psession.wbuf0[i_] = psession.network.weights[i_]; } while (mlpcontinuetrainingx(s, trnsubset, trnsubsetsize, ref ngradbatch, psession)) { if (s.datatype == 0) { eval = mlpbase.mlperrorsubset(psession.network, s.densexy, s.npoints, valsubset, valsubsetsize); } if (s.datatype == 1) { eval = mlpbase.mlperrorsparsesubset(psession.network, s.sparsexy, s.npoints, valsubset, valsubsetsize); } if ((double)(eval) <= (double)(ebest) || valsubsetsize == 0) { for (i_ = 0; i_ <= wcount - 1; i_++) { psession.wbuf0[i_] = psession.network.weights[i_]; } ebest = eval; itbest = itcnt; } if (itcnt > 30 && (double)(itcnt) > (double)(1.5 * itbest)) { break; } itcnt = itcnt + 1; } for (i_ = 0; i_ <= wcount - 1; i_++) { psession.network.weights[i_] = psession.wbuf0[i_]; } rep.ngrad = ngradbatch; } else { for (i = 0; i <= wcount - 1; i++) { psession.network.weights[i] = 0; } } // // Evaluate network performance and update PSession.BestParameters/BestRMSError // (if needed). // if (s.datatype == 0) { mlpbase.mlpallerrorssubset(psession.network, s.densexy, s.npoints, trnsubset, trnsubsetsize, modrep); } if (s.datatype == 1) { mlpbase.mlpallerrorssparsesubset(psession.network, s.sparsexy, s.npoints, trnsubset, trnsubsetsize, modrep); } if ((double)(modrep.rmserror) < (double)(psession.bestrmserror)) { mlpbase.mlpexporttunableparameters(psession.network, ref psession.bestparameters, ref pcount); psession.bestrmserror = modrep.rmserror; } // // Move session back to pool // alglib.smp.ae_shared_pool_recycle(sessions, ref psession); }
/************************************************************************* Unserialization: complex value *************************************************************************/ public static void unserializerealarray(alglib.serializer s, ref double[] v) { int n = 0; int i = 0; double t = 0; v = new double[0]; n = s.unserialize_int(); if (n == 0) { return; } v = new double[n]; for (i = 0; i <= n - 1; i++) { t = s.unserialize_double(); v[i] = t; } }
private static void mlptrainensemblex(mlptrainer s, mlpe.mlpensemble ensemble, int idx0, int idx1, int nrestarts, int trainingmethod, apserv.sinteger ngrad, bool isrootcall, alglib.smp.shared_pool esessions) { int pcount = 0; int nin = 0; int nout = 0; int wcount = 0; int i = 0; int j = 0; int k = 0; int trnsubsetsize = 0; int valsubsetsize = 0; int k0 = 0; apserv.sinteger ngrad0 = new apserv.sinteger(); apserv.sinteger ngrad1 = new apserv.sinteger(); mlpetrnsession psession = null; hqrnd.hqrndstate rs = new hqrnd.hqrndstate(); int i_ = 0; int i1_ = 0; nin = mlpbase.mlpgetinputscount(ensemble.network); nout = mlpbase.mlpgetoutputscount(ensemble.network); wcount = mlpbase.mlpgetweightscount(ensemble.network); if (mlpbase.mlpissoftmax(ensemble.network)) { pcount = nin; } else { pcount = nin + nout; } if (nrestarts <= 0) { nrestarts = 1; } // // Handle degenerate case // if (s.npoints < 2) { for (i = idx0; i <= idx1 - 1; i++) { for (j = 0; j <= wcount - 1; j++) { ensemble.weights[i * wcount + j] = 0.0; } for (j = 0; j <= pcount - 1; j++) { ensemble.columnmeans[i * pcount + j] = 0.0; ensemble.columnsigmas[i * pcount + j] = 1.0; } } return; } // // Process root call // if (isrootcall) { // // Prepare: // * prepare MLPETrnSessions // * fill ensemble by zeros (helps to detect errors) // initmlpetrnsessions(ensemble.network, s, esessions); for (i = idx0; i <= idx1 - 1; i++) { for (j = 0; j <= wcount - 1; j++) { ensemble.weights[i * wcount + j] = 0.0; } for (j = 0; j <= pcount - 1; j++) { ensemble.columnmeans[i * pcount + j] = 0.0; ensemble.columnsigmas[i * pcount + j] = 0.0; } } // // Train in non-root mode and exit // mlptrainensemblex(s, ensemble, idx0, idx1, nrestarts, trainingmethod, ngrad, false, esessions); return; } // // Split problem // if (idx1 - idx0 >= 2) { k0 = (idx1 - idx0) / 2; ngrad0.val = 0; ngrad1.val = 0; mlptrainensemblex(s, ensemble, idx0, idx0 + k0, nrestarts, trainingmethod, ngrad0, false, esessions); mlptrainensemblex(s, ensemble, idx0 + k0, idx1, nrestarts, trainingmethod, ngrad1, false, esessions); ngrad.val = ngrad0.val + ngrad1.val; return; } // // Retrieve and prepare session // alglib.smp.ae_shared_pool_retrieve(esessions, ref psession); // // Train // hqrnd.hqrndrandomize(rs); for (k = idx0; k <= idx1 - 1; k++) { // // Split set // trnsubsetsize = 0; valsubsetsize = 0; if (trainingmethod == 0) { do { trnsubsetsize = 0; valsubsetsize = 0; for (i = 0; i <= s.npoints - 1; i++) { if ((double)(math.randomreal()) < (double)(0.66)) { // // Assign sample to training set // psession.trnsubset[trnsubsetsize] = i; trnsubsetsize = trnsubsetsize + 1; } else { // // Assign sample to validation set // psession.valsubset[valsubsetsize] = i; valsubsetsize = valsubsetsize + 1; } } } while (!(trnsubsetsize != 0 && valsubsetsize != 0)); } if (trainingmethod == 1) { valsubsetsize = 0; trnsubsetsize = s.npoints; for (i = 0; i <= s.npoints - 1; i++) { psession.trnsubset[i] = hqrnd.hqrnduniformi(rs, s.npoints); } } // // Train // mlptrainnetworkx(s, nrestarts, -1, psession.trnsubset, trnsubsetsize, psession.valsubset, valsubsetsize, psession.network, psession.mlprep, true, psession.mlpsessions); ngrad.val = ngrad.val + psession.mlprep.ngrad; // // Save results // i1_ = (0) - (k * wcount); for (i_ = k * wcount; i_ <= (k + 1) * wcount - 1; i_++) { ensemble.weights[i_] = psession.network.weights[i_ + i1_]; } i1_ = (0) - (k * pcount); for (i_ = k * pcount; i_ <= (k + 1) * pcount - 1; i_++) { ensemble.columnmeans[i_] = psession.network.columnmeans[i_ + i1_]; } i1_ = (0) - (k * pcount); for (i_ = k * pcount; i_ <= (k + 1) * pcount - 1; i_++) { ensemble.columnsigmas[i_] = psession.network.columnsigmas[i_ + i1_]; } } // // Recycle session // alglib.smp.ae_shared_pool_recycle(esessions, ref psession); }
/************************************************************************* Serialization: Integer array *************************************************************************/ public static void serializeintegerarray(alglib.serializer s, int[] v, int n) { int i = 0; if (n < 0) { n = alglib.ap.len(v); } s.serialize_int(n); for (i = 0; i <= n - 1; i++) { s.serialize_int(v[i]); } }
/************************************************************************* This function initializes temporaries needed for training session. *************************************************************************/ private static void initmlptrnsessions(mlpbase.multilayerperceptron networktrained, bool randomizenetwork, mlptrainer trainer, alglib.smp.shared_pool sessions) { int[] dummysubset = new int[0]; smlptrnsession t = new smlptrnsession(); smlptrnsession p = null; if (alglib.smp.ae_shared_pool_is_initialized(sessions)) { // // Pool was already initialized. // Clear sessions stored in the pool. // alglib.smp.ae_shared_pool_first_recycled(sessions, ref p); while (p != null) { alglib.ap.assert(mlpbase.mlpsamearchitecture(p.network, networktrained), "InitMLPTrnSessions: internal consistency error"); p.bestrmserror = math.maxrealnumber; alglib.smp.ae_shared_pool_next_recycled(sessions, ref p); } } else { // // Prepare session and seed pool // initmlptrnsession(networktrained, randomizenetwork, trainer, t); alglib.smp.ae_shared_pool_set_seed(sessions, t); } }
/************************************************************************* Allocation of serializer: real matrix *************************************************************************/ public static void allocrealmatrix(alglib.serializer s, double[,] v, int n0, int n1) { int i = 0; int j = 0; if (n0 < 0) { n0 = alglib.ap.rows(v); } if (n1 < 0) { n1 = alglib.ap.cols(v); } s.alloc_entry(); s.alloc_entry(); for (i = 0; i <= n0 - 1; i++) { for (j = 0; j <= n1 - 1; j++) { s.alloc_entry(); } } }
/************************************************************************* This function initializes temporaries needed for training session. *************************************************************************/ private static void initmlpetrnsessions(mlpbase.multilayerperceptron individualnetwork, mlptrainer trainer, alglib.smp.shared_pool sessions) { mlpetrnsession t = new mlpetrnsession(); if (!alglib.smp.ae_shared_pool_is_initialized(sessions)) { initmlpetrnsession(individualnetwork, trainer, t); alglib.smp.ae_shared_pool_set_seed(sessions, t); } }
/************************************************************************* Unserialization: complex value *************************************************************************/ public static void unserializerealmatrix(alglib.serializer s, ref double[,] v) { int i = 0; int j = 0; int n0 = 0; int n1 = 0; double t = 0; v = new double[0, 0]; n0 = s.unserialize_int(); n1 = s.unserialize_int(); if (n0 == 0 || n1 == 0) { return; } v = new double[n0, n1]; for (i = 0; i <= n0 - 1; i++) { for (j = 0; j <= n1 - 1; j++) { t = s.unserialize_double(); v[i, j] = t; } } }
/************************************************************************* Serializer: serialization -- ALGLIB -- Copyright 14.03.2011 by Bochkanov Sergey *************************************************************************/ public static void dfserialize(alglib.serializer s, decisionforest forest) { s.serialize_int(scodes.getrdfserializationcode()); s.serialize_int(dffirstversion); s.serialize_int(forest.nvars); s.serialize_int(forest.nclasses); s.serialize_int(forest.ntrees); s.serialize_int(forest.bufsize); apserv.serializerealarray(s, forest.trees, forest.bufsize); }