/// <summary> /// Compute L1-norm. L1-norm computation doesn't subtract the mean from the source values. /// However, we substract the mean here in case subMean is true (if subMean is false, mean is zero). /// </summary> private static Float L1Norm(Float[] values, int count, Float mean = 0) { if (count == 0) { return(0); } return(SseUtils.SumAbs(mean, values, 0, count)); }
/// <summary> /// Compute L2-norm. L2-norm computation doesn't subtract the mean from the source values. /// However, we substract the mean here in case subMean is true (if subMean is false, mean is zero). /// </summary> private static Float L2Norm(Float[] values, int count, Float mean = 0) { if (count == 0) { return(0); } return(MathUtils.Sqrt(SseUtils.SumSq(mean, values, 0, count))); }
/// <summary> /// Compute LInf-norm. LInf-norm computation doesn't subtract the mean from the source values. /// However, we substract the mean here in case subMean is true (if subMean is false, mean is zero). /// </summary> private static Float LInfNorm(Float[] values, int count, Float mean = 0) { if (count == 0) { return(0); } return(SseUtils.MaxAbsDiff(mean, values, count)); }
/// <summary> /// Returns sum of elements in array /// </summary> public static Float Sum(Float[] a) { if (a == null || a.Length == 0) { return(0); } return(SseUtils.Sum(a, a.Length)); }
private static Float Mean(Float[] src, int count, int length) { if (length == 0 || count == 0) { return(0); } return(SseUtils.Sum(src, 0, count) / length); }
/// <summary> /// Matrix multiplication: /// if (add) /// dst = mat * src /// else /// dest += mat * src /// </summary> /// <param name="add">The addition flag</param> /// <param name="mat">The multiplier matrix</param> /// <param name="src">The source vector</param> /// <param name="dst">The destination vector</param> public static void MatTimesSrc(bool add, ICpuFullMatrix mat, ICpuVector src, ICpuVector dst) { bool colMajor = typeof(TMatrix) == typeof(CpuAlignedMatrixCol); AssertCompatible(mat, src, dst); var m = A(mat); SseUtils.MatTimesSrc(colMajor, add, m.Items, A(src).Items, A(dst).Items, m.RunCnt); }
/// <summary> /// Adds a multiple of an array to a second array. /// </summary> /// <param name="src">Array to add</param> /// <param name="dst">Array to add to</param> /// <param name="c">Multiple</param> public static void AddMult(Float[] src, Float[] dst, Float c) { Contracts.Check(src.Length == dst.Length, "Arrays must have the same dimensionality."); if (c == 0) { return; } SseUtils.AddScale(c, src, dst, src.Length); }
/// <summary> /// Perform in-place vector addition <c><paramref name="dst"/> += <paramref name="src"/></c>. /// </summary> public static void Add(Float[] src, Float[] dst) { Contracts.CheckValue(src, nameof(src)); Contracts.CheckValue(dst, nameof(dst)); Contracts.CheckParam(src.Length == dst.Length, nameof(dst), "Arrays must have the same dimensionality."); if (src.Length == 0) { return; } SseUtils.Add(src, dst, src.Length); }
public static Float DotProduct(Float[] a, ref VBuffer <Float> b) { Contracts.Check(Utils.Size(a) == b.Length, "Vectors must have the same dimensionality."); if (b.Count == 0) { return(0); } if (b.IsDense) { return(SseUtils.DotProductDense(a, b.Values, b.Length)); } return(SseUtils.DotProductSparse(a, b.Values, b.Indices, b.Count)); }
private static Float L2DiffSquaredDense(Float[] valuesA, Float[] valuesB, int length) { Contracts.AssertValueOrNull(valuesA); Contracts.AssertValueOrNull(valuesB); Contracts.Assert(0 <= length && length <= Utils.Size(valuesA)); Contracts.Assert(0 <= length && length <= Utils.Size(valuesB)); if (length == 0) { return(0); } return(SseUtils.L2DistSquared(valuesA, valuesB, length)); }
/// <summary> /// Multiplies arrays Dst *= A element by element and returns the result in <paramref name="dst"/> (Hadamard product). /// </summary> public static void MulElementWise(ref VBuffer <Float> a, ref VBuffer <Float> dst) { Contracts.Check(a.Length == dst.Length, "Vectors must have the same dimensionality."); if (a.IsDense && dst.IsDense) { SseUtils.MulElementWise(a.Values, dst.Values, dst.Values, a.Length); } else { VBufferUtils.ApplyWithEitherDefined(ref a, ref dst, (int ind, Float v1, ref Float v2) => { v2 *= v1; }); } }
/// <summary> /// Computes the dot product of two arrays /// Where "offset" is considered to be a's zero index /// </summary> /// <param name="a">one array</param> /// <param name="b">the second array (given as a VBuffer)</param> /// <param name="offset">offset in 'a'</param> /// <returns>the dot product</returns> public static Float DotProductWithOffset(ref VBuffer <Float> a, int offset, ref VBuffer <Float> b) { Contracts.Check(0 <= offset && offset <= a.Length); Contracts.Check(b.Length <= a.Length - offset, "VBuffer b must be no longer than a.Length - offset."); if (a.Count == 0 || b.Count == 0) { return(0); } if (a.IsDense) { if (b.IsDense) { return(SseUtils.DotProductDense(a.Values, offset, b.Values, b.Length)); } return(SseUtils.DotProductSparse(a.Values, offset, b.Values, b.Indices, b.Count)); } else { Float result = 0; int aMin = Utils.FindIndexSorted(a.Indices, 0, a.Count, offset); int aLim = Utils.FindIndexSorted(a.Indices, 0, a.Count, offset + b.Length); if (b.IsDense) { for (int iA = aMin; iA < aLim; ++iA) { result += a.Values[iA] * b.Values[a.Indices[iA] - offset]; } return(result); } for (int iA = aMin, iB = 0; iA < aLim && iB < b.Count;) { int aIndex = a.Indices[iA]; int bIndex = b.Indices[iB]; int comp = (aIndex - offset) - bIndex; if (comp == 0) { result += a.Values[iA++] * b.Values[iB++]; } else if (comp < 0) { iA++; } else { iB++; } } return(result); } }
/// <summary> /// Computes the dot product of two arrays /// Where "offset" is considered to be a's zero index /// </summary> /// <param name="a">one array</param> /// <param name="b">the second array (given as a VBuffer)</param> /// <param name="offset">offset in 'a'</param> /// <returns>the dot product</returns> public static Float DotProductWithOffset(Float[] a, int offset, ref VBuffer <Float> b) { Contracts.Check(0 <= offset && offset <= a.Length); Contracts.Check(b.Length <= a.Length - offset, "VBuffer b must be no longer than a.Length - offset."); if (b.Count == 0) { return(0); } if (b.IsDense) { return(SseUtils.DotProductDense(a, offset, b.Values, b.Length)); } return(SseUtils.DotProductSparse(a, offset, b.Values, b.Indices, b.Count)); }
/// <summary> /// Multiples the array by a real value /// </summary> /// <param name="dst">The array</param> /// <param name="c">Value to multiply vector with</param> public static void ScaleBy(Float[] dst, Float c) { if (c == 1) { return; } if (c != 0) { SseUtils.Scale(c, dst, dst.Length); } else { Array.Clear(dst, 0, dst.Length); } }
/// <summary> /// Compute Standard Deviation. /// We have two overloads of StdDev instead of one with <see cref="Nullable{Float}"/> mean for perf reasons. /// </summary> private static Float StdDev(Float[] values, int count, int length, Float mean) { Contracts.Assert(0 <= count && count <= length); if (count == 0) { return(0); } Float sumSq = 0; if (count != length && mean != 0) { // Sparse representation. Float meanSq = mean * mean; sumSq = (length - count) * meanSq; } sumSq += SseUtils.SumSq(mean, values, 0, count); return(MathUtils.Sqrt(sumSq / length)); }
/// <summary> /// Compute Standard Deviation. In case of both subMean and useStd are true, we technically need to compute variance /// based on centered values (i.e. after subtracting the mean). But since the centered /// values mean is approximately zero, we can use variance of non-centered values. /// </summary> private static Float StdDev(Float[] values, int count, int length) { Contracts.Assert(0 <= count && count <= length); if (count == 0) { return(0); } // We need a mean to compute variance. Float tmpMean = SseUtils.Sum(values, 0, count) / length; Float sumSq = 0; if (count != length && tmpMean != 0) { // Sparse representation. Float meanSq = tmpMean * tmpMean; sumSq = (length - count) * meanSq; } sumSq += SseUtils.SumSq(tmpMean, values, 0, count); return(MathUtils.Sqrt(sumSq / length)); }
private static Float L2DistSquaredHalfSparse(Float[] valuesA, int lengthA, Float[] valuesB, int[] indicesB, int countB) { Contracts.AssertValueOrNull(valuesA); Contracts.AssertValueOrNull(valuesB); Contracts.AssertValueOrNull(indicesB); Contracts.Assert(0 <= lengthA && lengthA <= Utils.Size(valuesA)); Contracts.Assert(0 <= countB && countB <= Utils.Size(indicesB)); Contracts.Assert(countB <= Utils.Size(valuesB)); var normA = SseUtils.SumSq(valuesA, 0, lengthA); if (countB == 0) { return(normA); } var normB = SseUtils.SumSq(valuesB, 0, countB); var dotP = SseUtils.DotProductSparse(valuesA, valuesB, indicesB, countB); var res = normA + normB - 2 * dotP; return(res < 0 ? 0 : res); }
/// <summary> /// Adds a multiple of a <see cref="VBuffer{T}"/> to a <see cref="Float"/> array. /// </summary> /// <param name="src">Buffer to add</param> /// <param name="dst">Array to add to</param> /// <param name="c">Coefficient</param> public static void AddMult(ref VBuffer <Float> src, Float[] dst, Float c) { Contracts.CheckValue(dst, nameof(dst)); Contracts.CheckParam(src.Length == dst.Length, nameof(dst), "Arrays must have the same dimensionality."); if (src.Count == 0 || c == 0) { return; } if (src.IsDense) { SseUtils.AddScale(c, src.Values, dst, src.Count); } else { for (int i = 0; i < src.Count; i++) { dst[src.Indices[i]] += c * src.Values[i]; } } }
public static Float DotProduct(ref VBuffer <Float> a, ref VBuffer <Float> b) { Contracts.Check(a.Length == b.Length, "Vectors must have the same dimensionality."); if (a.Count == 0 || b.Count == 0) { return(0); } if (a.IsDense) { if (b.IsDense) { return(SseUtils.DotProductDense(a.Values, b.Values, a.Length)); } return(SseUtils.DotProductSparse(a.Values, b.Values, b.Indices, b.Count)); } if (b.IsDense) { return(SseUtils.DotProductSparse(b.Values, a.Values, a.Indices, a.Count)); } return(DotProductSparse(a.Values, a.Indices, 0, a.Count, b.Values, b.Indices, 0, b.Count, 0)); }
private static void FillValues(IExceptionContext ectx, ref VBuffer <Float> src, ref VBuffer <Float> dst, Float divisor, Float scale, Float offset = 0) { int count = src.Count; int length = src.Length; ectx.Assert(Utils.Size(src.Values) >= count); ectx.Assert(divisor >= 0); if (count == 0) { dst = new VBuffer <Float>(length, 0, dst.Values, dst.Indices); return; } ectx.Assert(count > 0); ectx.Assert(length > 0); Float normScale = scale; if (divisor > 0) { normScale /= divisor; } // Don't normalize small values. if (normScale < MinScale) { normScale = 1; } if (offset == 0) { var dstValues = dst.Values; if (Utils.Size(dstValues) < count) { dstValues = new Float[count]; } var dstIndices = dst.Indices; if (!src.IsDense) { if (Utils.Size(dstIndices) < count) { dstIndices = new int[count]; } Array.Copy(src.Indices, dstIndices, count); } SseUtils.Scale(normScale, src.Values, dstValues, count); dst = new VBuffer <Float>(length, count, dstValues, dstIndices); return; } // Subtracting the mean requires a dense representation. src.CopyToDense(ref dst); if (normScale != 1) { SseUtils.ScaleAdd(normScale, -offset, dst.Values, length); } else { SseUtils.Add(-offset, dst.Values, length); } }
/// <summary> /// Returns a dot product of dense vector 'a' starting from offset 'aOffset' and sparse vector 'b' /// with first 'count' valid elements and their corresponding 'indices'. /// </summary> private static Float DotProduct(Float[] a, int aOffset, Float[] b, int[] indices, int count) { Contracts.Assert(count <= indices.Length); return(SseUtils.DotProductSparse(a, aOffset, b, indices, count)); }
public static void Add(float[] src, int[] indices, float[] dst, int dstOffset, int count) => SseUtils.Add(src, indices, dst, dstOffset, count);
public static float L2DistSquared(float[] a, float[] b, int count) => SseUtils.L2DistSquared(a, b, count);
public static float DotProductSparse(float[] a, int offset, float[] b, int[] indices, int count) => SseUtils.DotProductSparse(a, offset, b, indices, count);
public static float DotProductDense(float[] a, int offset, float[] b, int count) => SseUtils.DotProductDense(a, offset, b, count);
public static void MulElementWise(float[] src1, float[] src2, float[] dst, int count) => SseUtils.MulElementWise(src1, src2, dst, count);
public static float SumSq(float[] src, int count) => SseUtils.SumSq(src, count);
public static void Scale(float a, float[] dst, int count) => SseUtils.Scale(a, dst, count);
/// <inheritdoc/> protected override void TrainWithoutLock(IProgressChannelProvider progress, FloatLabelCursor.Factory cursorFactory, IRandom rand, IdToIdxLookup idToIdx, int numThreads, DualsTableBase duals, Float[] biasReg, Float[] invariants, Float lambdaNInv, VBuffer <Float>[] weights, Float[] biasUnreg, VBuffer <Float>[] l1IntermediateWeights, Float[] l1IntermediateBias, Float[] featureNormSquared) { Contracts.AssertValueOrNull(progress); Contracts.Assert(_args.L1Threshold.HasValue); Contracts.AssertValueOrNull(idToIdx); Contracts.AssertValueOrNull(invariants); Contracts.AssertValueOrNull(featureNormSquared); int weightArraySize = WeightArraySize; Contracts.Assert(weightArraySize == _numClasses); Contracts.Assert(Utils.Size(weights) == weightArraySize); Contracts.Assert(Utils.Size(biasReg) == weightArraySize); Contracts.Assert(Utils.Size(biasUnreg) == weightArraySize); int maxUpdateTrials = 2 * numThreads; var l1Threshold = _args.L1Threshold.Value; bool l1ThresholdZero = l1Threshold == 0; var lr = _args.BiasLearningRate * _args.L2Const.Value; var pch = progress != null?progress.StartProgressChannel("Dual update") : null; using (pch) using (var cursor = _args.Shuffle ? cursorFactory.Create(rand) : cursorFactory.Create()) { long rowCount = 0; if (pch != null) { pch.SetHeader(new ProgressHeader("examples"), e => e.SetProgress(0, rowCount)); } Func <UInt128, long> getIndexFromId = GetIndexFromIdGetter(idToIdx); while (cursor.MoveNext()) { long idx = getIndexFromId(cursor.Id); long dualIndexInitPos = idx * weightArraySize; var features = cursor.Features; var label = (int)cursor.Label; Float invariant; Float normSquared; if (invariants != null) { invariant = invariants[idx]; Contracts.AssertValue(featureNormSquared); normSquared = featureNormSquared[idx]; } else { normSquared = VectorUtils.NormSquared(features); if (_args.BiasLearningRate == 0) { normSquared += 1; } invariant = _loss.ComputeDualUpdateInvariant(2 * normSquared * lambdaNInv * GetInstanceWeight(cursor)); } // The output for the label class using current weights and bias. var labelOutput = WDot(ref features, ref weights[label], biasReg[label] + biasUnreg[label]); var instanceWeight = GetInstanceWeight(cursor); // This will be the new dual variable corresponding to the label class. Float labelDual = 0; // This will be used to update the weights and regularized bias corresponding to the label class. Float labelPrimalUpdate = 0; // This will be used to update the unregularized bias corresponding to the label class. Float labelAdjustment = 0; // Iterates through all classes. for (int iClass = 0; iClass < _numClasses; iClass++) { // Skip the dual/weights/bias update for label class. Will be taken care of at the end. if (iClass == label) { continue; } // Loop trials for compare-and-swap updates of duals. // In general, concurrent update conflict to the same dual variable is rare // if data is shuffled. for (int numTrials = 0; numTrials < maxUpdateTrials; numTrials++) { long dualIndex = iClass + dualIndexInitPos; var dual = duals[dualIndex]; var output = labelOutput + labelPrimalUpdate * normSquared - WDot(ref features, ref weights[iClass], biasReg[iClass] + biasUnreg[iClass]); var dualUpdate = _loss.DualUpdate(output, 1, dual, invariant, numThreads); // The successive over-relaxation apporach to adjust the sum of dual variables (biasReg) to zero. // Reference to details: http://stat.rutgers.edu/home/tzhang/papers/ml02_dual.pdf, pp. 16-17. var adjustment = l1ThresholdZero ? lr * biasReg[iClass] : lr * l1IntermediateBias[iClass]; dualUpdate -= adjustment; bool success = false; duals.ApplyAt(dualIndex, (long index, ref Float value) => { success = Interlocked.CompareExchange(ref value, dual + dualUpdate, dual) == dual; }); if (success) { // Note: dualConstraint[iClass] = lambdaNInv * (sum of duals[iClass]) var primalUpdate = dualUpdate * lambdaNInv * instanceWeight; labelDual -= dual + dualUpdate; labelPrimalUpdate += primalUpdate; biasUnreg[iClass] += adjustment * lambdaNInv * instanceWeight; labelAdjustment -= adjustment; if (l1ThresholdZero) { VectorUtils.AddMult(ref features, weights[iClass].Values, -primalUpdate); biasReg[iClass] -= primalUpdate; } else { //Iterative shrinkage-thresholding (aka. soft-thresholding) //Update v=denseWeights as if there's no L1 //Thresholding: if |v[j]| < threshold, turn off weights[j] //If not, shrink: w[j] = v[i] - sign(v[j]) * threshold l1IntermediateBias[iClass] -= primalUpdate; if (_args.BiasLearningRate == 0) { biasReg[iClass] = Math.Abs(l1IntermediateBias[iClass]) - l1Threshold > 0.0 ? l1IntermediateBias[iClass] - Math.Sign(l1IntermediateBias[iClass]) * l1Threshold : 0; } if (features.IsDense) { SseUtils.SdcaL1UpdateDense(-primalUpdate, features.Length, features.Values, l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); } else if (features.Count > 0) { SseUtils.SdcaL1UpdateSparse(-primalUpdate, features.Length, features.Values, features.Indices, features.Count, l1Threshold, l1IntermediateWeights[iClass].Values, weights[iClass].Values); } } break; } } } // Updating with label class weights and dual variable. duals[label + dualIndexInitPos] = labelDual; biasUnreg[label] += labelAdjustment * lambdaNInv * instanceWeight; if (l1ThresholdZero) { VectorUtils.AddMult(ref features, weights[label].Values, labelPrimalUpdate); biasReg[label] += labelPrimalUpdate; } else { l1IntermediateBias[label] += labelPrimalUpdate; var intermediateBias = l1IntermediateBias[label]; biasReg[label] = Math.Abs(intermediateBias) - l1Threshold > 0.0 ? intermediateBias - Math.Sign(intermediateBias) * l1Threshold : 0; if (features.IsDense) { SseUtils.SdcaL1UpdateDense(labelPrimalUpdate, features.Length, features.Values, l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); } else if (features.Count > 0) { SseUtils.SdcaL1UpdateSparse(labelPrimalUpdate, features.Length, features.Values, features.Indices, features.Count, l1Threshold, l1IntermediateWeights[label].Values, weights[label].Values); } } rowCount++; } } }
public static float SumAbs(float[] src, int offset, int count) => SseUtils.SumAbs(src, offset, count);