public static Tensor[] Repeat(this Session session, Tensor x, int count) { const string ActionName = "repeat"; return(session.RunOperation( ActionName, () => { bool calculateGradient = session.CalculateGradients && x.CalculateGradient; Tensor[] ys = session.AllocateTensors(ActionName, count, x.Shape, calculateGradient); for (int i = 0; i < count; i++) { Vectors.Copy(x.Length, x.Weights, 0, ys[i].Weights, 0); } #if !NOLEARNING if (calculateGradient) { session.Push( ActionName, () => { float alpha = 1.0f / count; for (int i = 0; i < count; i++) { Mathematics.AddProductC(x.Length, ys[i].Gradient, 0, alpha, x.Gradient, 0); } }); // return copy of the array; calling method can replace its content; our closure keeps the array, not its items return ys.ToArray(); } #endif return ys; })); }
public bool Optimize( int numberOfVariables, float[] c, float[] p, int[] y, Func <int, int[], int, float[], float[]> q, out float[] solution, out float rho) { // make copies, these array will be modified p = p.ToArray(); y = y.ToArray(); float[] temp = new float[numberOfVariables]; float[] g = new float[numberOfVariables]; // gradient float[] gbar = new float[numberOfVariables]; // gradient, if we treat free variables as 0 float[] qd = new float[numberOfVariables]; for (int k = 0; k < qd.Length; k++) { qd[k] = q(k, new[] { k }, 1, temp)[0]; } float[] qi = new float[numberOfVariables]; float[] qj = new float[numberOfVariables]; bool unshrink = false; // Lagrange multipliers float[] alpha = new float[numberOfVariables]; // initialize alpha_status Status[] alphaStatus = new Status[numberOfVariables]; for (int i = 0; i < numberOfVariables; i++) { UpdateAlphaStatus(i); } // initialize active set (for shrinking) int activeSize = numberOfVariables; int[] activeSet = Arrays.Indexes(numberOfVariables); // initialize index lookup vector int[] indices = Arrays.Indexes(numberOfVariables); // initialize gradient Vectors.Copy(numberOfVariables, p, 0, g, 0); Vectors.Set(numberOfVariables, 0.0f, gbar, 0); /*for (int i = 0; i < numberOfVariables; i++) * { * g[i] = p[i]; * gbar[i] = 0; * }*/ for (int i = 0; i < numberOfVariables; i++) { if (!IsLowerBound(i)) { q(i, indices, numberOfVariables, qi); Mathematics.AddProductC(numberOfVariables, qi, 0, alpha[i], g, 0); /*float alpha_i = alpha[i]; * for (int j = 0; j < numberOfVariables; j++) * { * g[j] += alpha_i * qi[j]; * }*/ if (IsUpperBound(i)) { Mathematics.AddProductC(numberOfVariables, qi, 0, c[i], gbar, 0); /*for (int j = 0; j < numberOfVariables; j++) * { * gbar[j] += c[i] * qi[j]; * }*/ } } } // optimization step int iter = 0; int max_iter = MinMax.Max(10000000, numberOfVariables > int.MaxValue / 100 ? int.MaxValue : 100 * numberOfVariables); int counter = MinMax.Min(numberOfVariables, 1000) + 1; while (iter < max_iter) { // show progress and do shrinking if (--counter == 0) { counter = MinMax.Min(numberOfVariables, 1000); if (this.Shrinking) { Shrink(); } Trace.WriteLine("."); } if (SelectWorkingSet(out int i, out int j) != 0) { // reconstruct the whole gradient ReconstructGradient(); // reset active set size and check activeSize = numberOfVariables; Trace.WriteLine("*"); if (SelectWorkingSet(out i, out j) != 0) { break; } else { counter = 1; // do shrinking next iteration } } iter++; // update alpha[i] and alpha[j], handle bounds carefully q(i, indices, activeSize, qi); q(j, indices, activeSize, qj); float ci = c[i]; float cj = c[j]; float old_alpha_i = alpha[i]; float old_alpha_j = alpha[j]; if (y[i] != y[j]) { float quad_coef = qd[i] + qd[j] + (2.0f * qi[j]); if (quad_coef <= 0) { quad_coef = TAU; } float delta = (-g[i] - g[j]) / quad_coef; float diff = alpha[i] - alpha[j]; alpha[i] += delta; alpha[j] += delta; if (diff > 0) { if (alpha[j] < 0) { alpha[j] = 0; alpha[i] = diff; } } else { if (alpha[i] < 0) { alpha[i] = 0; alpha[j] = -diff; } } if (diff > ci - cj) { if (alpha[i] > ci) { alpha[i] = ci; alpha[j] = ci - diff; } } else { if (alpha[j] > cj) { alpha[j] = cj; alpha[i] = cj + diff; } } } else { float quad_coef = qd[i] + qd[j] - (2.0f * qi[j]); if (quad_coef <= 0) { quad_coef = TAU; } float delta = (g[i] - g[j]) / quad_coef; float sum = alpha[i] + alpha[j]; alpha[i] -= delta; alpha[j] += delta; if (sum > ci) { if (alpha[i] > ci) { alpha[i] = ci; alpha[j] = sum - ci; } } else { if (alpha[j] < 0) { alpha[j] = 0; alpha[i] = sum; } } if (sum > cj) { if (alpha[j] > cj) { alpha[j] = cj; alpha[i] = sum - cj; } } else { if (alpha[i] < 0) { alpha[i] = 0; alpha[j] = sum; } } } // update G float delta_alpha_i = alpha[i] - old_alpha_i; float delta_alpha_j = alpha[j] - old_alpha_j; for (int k = 0; k < activeSize; k++) { g[k] += (qi[k] * delta_alpha_i) + (qj[k] * delta_alpha_j); } // update alpha_status and G_bar { bool ui = IsUpperBound(i); bool uj = IsUpperBound(j); UpdateAlphaStatus(i); UpdateAlphaStatus(j); if (ui != IsUpperBound(i)) { q(i, indices, numberOfVariables, qi); Mathematics.AddProductC(numberOfVariables, qi, 0, ui ? -ci : ci, gbar, 0); /*if (ui) * { * for (int k = 0; k < numberOfVariables; k++) * { * gbar[k] -= ci * qi[k]; * } * } * else * { * for (int k = 0; k < numberOfVariables; k++) * { * gbar[k] += ci * qi[k]; * } * }*/ } if (uj != IsUpperBound(j)) { q(j, indices, numberOfVariables, qj); Mathematics.AddProductC(numberOfVariables, qj, 0, uj ? -cj : cj, gbar, 0); /*if (uj) * { * for (int k = 0; k < numberOfVariables; k++) * { * gbar[k] -= cj * qj[k]; * } * } * else * { * for (int k = 0; k < numberOfVariables; k++) * { * gbar[k] += cj * qj[k]; * } * }*/ } } } if (iter >= max_iter) { if (activeSize < numberOfVariables) { // reconstruct the whole gradient to calculate objective value ReconstructGradient(); activeSize = numberOfVariables; Trace.WriteLine("*"); } Trace.WriteLine("WARNING: reaching max number of iterations."); } // calculate rho rho = CalculateRho(); // calculate objective value /*float v = 0; * for (int i = 0; i < numberOfVariables; i++) * { * v += alpha[i] * (g[i] + p[i]); * } * * si->obj = v / 2;*/ // put back the solution solution = new float[numberOfVariables]; for (int i = 0; i < numberOfVariables; i++) { solution[activeSet[i]] = alpha[i]; } ////si->upper_bound_p = Cp; ////si->upper_bound_n = Cn; Trace.WriteLine(string.Format( CultureInfo.InvariantCulture, "optimization finished, #iter = {0}", iter)); return(iter < max_iter); // return 1 if already optimal, return 0 otherwise int SelectWorkingSet(out int out_i, out int out_j) { // return i,j such that // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) // j: minimizes the decrease of obj value // (if quadratic coefficient <= 0, replace it with tau) // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) float gmax = float.NegativeInfinity; float gmax2 = float.NegativeInfinity; int gmax_idx = -1; int gmin_idx = -1; float obj_diff_min = float.PositiveInfinity; for (int t = 0; t < activeSize; t++) { if (y[t] == +1) { if (!IsUpperBound(t)) { if (-g[t] >= gmax) { gmax = -g[t]; gmax_idx = t; } } } else { if (!IsLowerBound(t)) { if (g[t] >= gmax) { gmax = g[t]; gmax_idx = t; } } } } int i = gmax_idx; if (i != -1) { q(i, indices, activeSize, temp); // NULL Q_i not accessed: Gmax=-INF if i=-1 } for (int j = 0; j < activeSize; j++) { if (y[j] == +1) { if (!IsLowerBound(j)) { float grad_diff = gmax + g[j]; if (g[j] >= gmax2) { gmax2 = g[j]; } if (grad_diff > 0) { float obj_diff; float quad_coef = qd[i] + qd[j] - (2.0f * y[i] * temp[j]); if (quad_coef > 0) { obj_diff = -(grad_diff * grad_diff) / quad_coef; } else { obj_diff = -(grad_diff * grad_diff) / TAU; } if (obj_diff <= obj_diff_min) { gmin_idx = j; obj_diff_min = obj_diff; } } } } else { if (!IsUpperBound(j)) { float grad_diff = gmax - g[j]; if (-g[j] >= gmax2) { gmax2 = -g[j]; } if (grad_diff > 0) { float obj_diff; float quad_coef = qd[i] + qd[j] + (2.0f * y[i] * temp[j]); if (quad_coef > 0) { obj_diff = -(grad_diff * grad_diff) / quad_coef; } else { obj_diff = -(grad_diff * grad_diff) / TAU; } if (obj_diff <= obj_diff_min) { gmin_idx = j; obj_diff_min = obj_diff; } } } } } if (gmax + gmax2 < this.Tolerance) { out_i = 0; out_j = 0; return(1); } out_i = gmax_idx; out_j = gmin_idx; return(0); } void Shrink() { float gmax1 = float.NegativeInfinity; // max { -y_i * grad(f)_i | i in I_up(\alpha) } float gmax2 = float.NegativeInfinity; // max { y_i * grad(f)_i | i in I_low(\alpha) } // find maximal violating pair first for (int i = 0; i < activeSize; i++) { if (y[i] == 1) { if (!IsUpperBound(i)) { if (-g[i] >= gmax1) { gmax1 = -g[i]; } } if (!IsLowerBound(i)) { if (g[i] >= gmax2) { gmax2 = g[i]; } } } else { if (!IsUpperBound(i)) { if (-g[i] >= gmax2) { gmax2 = -g[i]; } } if (!IsLowerBound(i)) { if (g[i] >= gmax1) { gmax1 = g[i]; } } } } if (!unshrink && gmax1 + gmax2 <= this.Tolerance * 10) { unshrink = true; ReconstructGradient(); activeSize = numberOfVariables; Trace.WriteLine("*"); } for (int i = 0; i < activeSize; i++) { if (IsShrunk(i, gmax1, gmax2)) { activeSize--; while (activeSize > i) { if (!IsShrunk(activeSize, gmax1, gmax2)) { SwapIndex(i, activeSize); break; } activeSize--; } } } } void ReconstructGradient() { // reconstruct inactive elements of G from G_bar and free variables if (activeSize == numberOfVariables) { return; } Mathematics.Add( numberOfVariables - activeSize, gbar, activeSize, p, activeSize, g, activeSize); /*for (int j = activeSize; j < numberOfVariables; j++) * { * g[j] = gbar[j] + p[j]; * }*/ int freeCount = 0; for (int j = 0; j < activeSize; j++) { if (IsFree(j)) { freeCount++; } } if (2 * freeCount < activeSize) { Trace.WriteLine("WARNING: using -h 0 may be faster"); } if (freeCount * numberOfVariables > 2 * activeSize * (numberOfVariables - activeSize)) { for (int i = activeSize; i < numberOfVariables; i++) { q(indices[i], indices, activeSize, temp); for (int j = 0; j < activeSize; j++) { if (IsFree(j)) { g[i] += alpha[j] * temp[j]; } } } } else { for (int i = 0; i < activeSize; i++) { if (IsFree(i)) { q(indices[i], indices, numberOfVariables, temp); Mathematics.AddProductC( numberOfVariables - activeSize, temp, activeSize, alpha[i], g, activeSize); /*float alpha_i = alpha[i]; * for (int j = activeSize; j < numberOfVariables; j++) * { * g[j] += alpha_i * temp[j]; * }*/ } } } } void SwapIndex(int i, int j) { Swap(indices); Swap(y); Swap(g); Swap(alphaStatus); Swap(alpha); Swap(p); Swap(activeSet); Swap(gbar); void Swap <T>(T[] array) { T t = array[i]; array[i] = array[j]; array[j] = t; } }
public static Tensor LRN( this Session session, Tensor x, int kernelSize, float alpha, float beta, float k) { const string ActionName = "lrn"; return(session.RunOperation( ActionName, () => { bool calculateGradient = session.CalculateGradients && x.CalculateGradient; Tensor y = session.AllocateTensor(ActionName, x.Shape, calculateGradient); Tensor scale = session.AllocateTensor("lrn wsp", x.Shape, false); // 1. calculate scale // scale(i) = k + alpha / n * sum(x(j) ^ 2) // scale will be later reused in back-propagation // use output as a temporary buffer Vectors.Square(x.Length, x.Weights, 0, scale.Weights, 0); NeuralOperations.LRNKernel(scale.Axes, scale.Strides, scale.Weights, y.Weights, kernelSize); scale.Set(k); scale.AddProductC(y, alpha / kernelSize); // 2. calculate forward tensor // y(i) = x(i) * scale(i) ^ -beta Vectors.Pow(scale.Length, scale.Weights, 0, -beta, y.Weights, 0); y.Mul(x); #if !NOLEARNING if (calculateGradient) { session.Push( ActionName, () => { Tensor work = session.AllocateTensor("lrn wsp2", x.Shape, false); // 1. calculate x(i) * sum(y(j) * dy(j) / scale(j)) // use dx as a temporary buffer Mathematics.Mul(y.Length, y.Weights, 0, y.Gradient, 0, x.Gradient, 0); Mathematics.Div(x.Length, scale.Weights, 0, x.Gradient, 0); NeuralOperations.LRNKernel(x.Shape.Axes, x.Shape.Strides, x.Gradient, work.Weights, kernelSize); work.Mul(x); // 2. calculate scale(i) ^ -beta * dy(i) Vectors.Pow(scale.Length, scale.Weights, 0, -beta, x.Gradient, 0); Mathematics.Mul(x.Length, y.Gradient, 0, x.Gradient, 0); // 3. calculate final sum Mathematics.AddProductC(x.Length, work.Weights, 0, -2.0f * alpha * beta / kernelSize, x.Gradient, 0); }); } #endif return y; })); }