Пример #1
0
    public static MemoryGroupByOperation AddLinearRegression(this MemoryGroupByOperation op, string sourceXcolumn, string sourceYcolumn, string alphaColumn, string bColumn, string countColumn)
    {
        return(op.AddAggregator((aggregate, groupRows) =>
        {
            var count = 0;
            double sumX = 0.0, sumY = 0.0, sumX2 = 0.0, sumXy = 0.0;
            foreach (var row in groupRows)
            {
                if (!row.HasValue(sourceXcolumn) || !row.HasValue(sourceYcolumn))
                {
                    return;
                }

                var x = row.GetAs <double>(sourceXcolumn);
                var y = row.GetAs <double>(sourceYcolumn);

                count++;
                sumX += x;
                sumX2 += x * x;
                sumY += y;
                sumXy += x * y;
            }

            var divider = (count * sumX2) - (sumX * sumX);
            double?alpha;
            double?b = null;

            alpha = count == 1 || Math.Abs(divider) < 0.0001
                ? 0.0
                : ((count * sumXy) - (sumX * sumY)) / divider;

            if (count == 1)
            {
                b = sumY;
            }
            else if (count > 1)
            {
                var avgX = sumX / count; // need to be the average of the full aggregate group
                var avgY = sumY / count;
                b = avgY - (alpha * avgX);
            }

            aggregate[bColumn] = b;
            aggregate[alphaColumn] = alpha;
            aggregate[countColumn] = groupRows.Count;
        }));
    }
Пример #2
0
 /// <summary>
 /// New value will be int.
 /// </summary>
 public static MemoryGroupByOperation AddIntMax(this MemoryGroupByOperation op, string sourceColumn, string targetColumn = null)
 {
     return(op.AddAggregator((aggregate, groupRows) => aggregate[targetColumn ?? sourceColumn] = groupRows.Max(x => x.GetAs(sourceColumn, 0))));
 }