public static MemoryGroupByOperation AddLinearRegression(this MemoryGroupByOperation op, string sourceXcolumn, string sourceYcolumn, string alphaColumn, string bColumn, string countColumn) { return(op.AddAggregator((aggregate, groupRows) => { var count = 0; double sumX = 0.0, sumY = 0.0, sumX2 = 0.0, sumXy = 0.0; foreach (var row in groupRows) { if (!row.HasValue(sourceXcolumn) || !row.HasValue(sourceYcolumn)) { return; } var x = row.GetAs <double>(sourceXcolumn); var y = row.GetAs <double>(sourceYcolumn); count++; sumX += x; sumX2 += x * x; sumY += y; sumXy += x * y; } var divider = (count * sumX2) - (sumX * sumX); double?alpha; double?b = null; alpha = count == 1 || Math.Abs(divider) < 0.0001 ? 0.0 : ((count * sumXy) - (sumX * sumY)) / divider; if (count == 1) { b = sumY; } else if (count > 1) { var avgX = sumX / count; // need to be the average of the full aggregate group var avgY = sumY / count; b = avgY - (alpha * avgX); } aggregate[bColumn] = b; aggregate[alphaColumn] = alpha; aggregate[countColumn] = groupRows.Count; })); }
/// <summary> /// New value will be int. /// </summary> public static MemoryGroupByOperation AddIntMax(this MemoryGroupByOperation op, string sourceColumn, string targetColumn = null) { return(op.AddAggregator((aggregate, groupRows) => aggregate[targetColumn ?? sourceColumn] = groupRows.Max(x => x.GetAs(sourceColumn, 0)))); }