private static List <DropSlotsTransform.Column> CreateDropSlotsColumns(Arguments args, int size, long[][] scores, out int[] selectedCount) { Contracts.Assert(size > 0); Contracts.Assert(Utils.Size(scores) == size); Contracts.AssertValue(args); Contracts.Assert(Utils.Size(args.Column) == size); selectedCount = new int[scores.Length]; var columns = new List <DropSlotsTransform.Column>(); for (int i = 0; i < size; i++) { var col = new DropSlotsTransform.Column(); col.Source = args.Column[i]; var slots = new List <DropSlotsTransform.Range>(); var score = scores[i]; selectedCount[i] = 0; for (int j = 0; j < score.Length; j++) { if (score[j] < args.Count) { // Adjacent slots are combined into a single range. var range = new DropSlotsTransform.Range(); range.Min = j; while (j < score.Length && score[j] < args.Count) { j++; } range.Max = j - 1; slots.Add(range); if (j < score.Length) { selectedCount[i]++; } } else { selectedCount[i]++; } } if (slots.Count > 0) { col.Slots = slots.ToArray(); columns.Add(col); } } return(columns); }
private static DropSlotsTransform.Column CreateDropSlotsColumn(Arguments args, ref VBuffer <Single> scores, out int selectedCount) { // Not checking the scores.Length, because: // 1. If it's the same as the features column length, we should be constructing the right DropSlots arguments. // 2. If it's less, we assume that the rest of the scores are zero and we drop the slots. // 3. If it's greater, the drop slots ignores the ranges that are outside the valid range of indices for the column. Contracts.Assert(args.Threshold.HasValue != args.NumSlotsToKeep.HasValue); var col = new DropSlotsTransform.Column(); col.Source = args.FeatureColumn; selectedCount = 0; // Degenerate case, dropping all slots. if (scores.Count == 0) { var range = new DropSlotsTransform.Range(); col.Slots = new DropSlotsTransform.Range[] { range }; return(col); } int tiedScoresToKeep; float threshold; if (args.Threshold.HasValue) { threshold = args.Threshold.Value; tiedScoresToKeep = threshold > 0 ? int.MaxValue : 0; } else { Contracts.Assert(args.NumSlotsToKeep.HasValue); threshold = ComputeThreshold(scores.Values, scores.Count, args.NumSlotsToKeep.Value, out tiedScoresToKeep); } var slots = new List <DropSlotsTransform.Range>(); for (int i = 0; i < scores.Count; i++) { var score = Math.Abs(scores.Values[i]); if (score > threshold) { selectedCount++; continue; } if (score == threshold && tiedScoresToKeep > 0) { tiedScoresToKeep--; selectedCount++; continue; } var range = new DropSlotsTransform.Range(); range.Min = i; while (++i < scores.Count) { score = Math.Abs(scores.Values[i]); if (score > threshold) { selectedCount++; break; } if (score == threshold && tiedScoresToKeep > 0) { tiedScoresToKeep--; selectedCount++; break; } } range.Max = i - 1; slots.Add(range); } if (!scores.IsDense) { int ii = 0; var count = slots.Count; for (int i = 0; i < count; i++) { var range = slots[i]; Contracts.Assert(range.Max != null); var min = range.Min; var max = range.Max.Value; Contracts.Assert(min <= max); Contracts.Assert(max < scores.Count); range.Min = min == 0 ? 0 : scores.Indices[min - 1] + 1; range.Max = max == scores.Count - 1 ? scores.Length - 1 : scores.Indices[max + 1] - 1; // Add the gaps before this range. for (; ii < min; ii++) { var gapMin = ii == 0 ? 0 : scores.Indices[ii - 1] + 1; var gapMax = scores.Indices[ii] - 1; if (gapMin <= gapMax) { var gap = new DropSlotsTransform.Range(); gap.Min = gapMin; gap.Max = gapMax; slots.Add(gap); } } ii = max; } // Add the gaps after the last range. for (; ii <= scores.Count; ii++) { var gapMin = ii == 0 ? 0 : scores.Indices[ii - 1] + 1; var gapMax = ii == scores.Count ? scores.Length - 1 : scores.Indices[ii] - 1; if (gapMin <= gapMax) { var gap = new DropSlotsTransform.Range(); gap.Min = gapMin; gap.Max = gapMax; slots.Add(gap); } } // Remove all slots past scores.Length. var lastRange = new DropSlotsTransform.Range(); lastRange.Min = scores.Length; slots.Add(lastRange); } if (slots.Count > 0) { col.Slots = slots.ToArray(); return(col); } return(null); }
private static List <DropSlotsTransform.Column> CreateDropSlotsColumns(string[] cols, int size, Single[][] scores, Single threshold, int tiedScoresToKeep, out int[] selectedCount) { Contracts.Assert(size > 0); Contracts.Assert(Utils.Size(scores) == size); Contracts.Assert(Utils.Size(cols) == size); Contracts.Assert(threshold > 0 || (threshold == 0 && tiedScoresToKeep == 0)); var columns = new List <DropSlotsTransform.Column>(); selectedCount = new int[scores.Length]; for (int i = 0; i < size; i++) { var col = new DropSlotsTransform.Column(); col.Source = cols[i]; var slots = new List <DropSlotsTransform.Range>(); var score = scores[i]; selectedCount[i] = 0; for (int j = 0; j < score.Length; j++) { var sc = score[j]; if (sc > threshold) { selectedCount[i]++; continue; } if (sc == threshold && tiedScoresToKeep > 0) { tiedScoresToKeep--; selectedCount[i]++; continue; } // Adjacent slots are combined into a single range. var range = new DropSlotsTransform.Range(); range.Min = j; while (++j < score.Length) { sc = score[j]; if (sc > threshold) { selectedCount[i]++; break; } if (sc == threshold && tiedScoresToKeep > 0) { tiedScoresToKeep--; selectedCount[i]++; break; } } range.Max = j - 1; slots.Add(range); } if (slots.Count > 0) { col.Slots = slots.ToArray(); columns.Add(col); } } return(columns); }