private static List <DropSlotsTransform.Column> CreateDropSlotsColumns(Arguments args, int size, long[][] scores, out int[] selectedCount)
        {
            Contracts.Assert(size > 0);
            Contracts.Assert(Utils.Size(scores) == size);
            Contracts.AssertValue(args);
            Contracts.Assert(Utils.Size(args.Column) == size);

            selectedCount = new int[scores.Length];
            var columns = new List <DropSlotsTransform.Column>();

            for (int i = 0; i < size; i++)
            {
                var col = new DropSlotsTransform.Column();
                col.Source = args.Column[i];
                var slots = new List <DropSlotsTransform.Range>();
                var score = scores[i];
                selectedCount[i] = 0;
                for (int j = 0; j < score.Length; j++)
                {
                    if (score[j] < args.Count)
                    {
                        // Adjacent slots are combined into a single range.
                        var range = new DropSlotsTransform.Range();
                        range.Min = j;
                        while (j < score.Length && score[j] < args.Count)
                        {
                            j++;
                        }
                        range.Max = j - 1;
                        slots.Add(range);
                        if (j < score.Length)
                        {
                            selectedCount[i]++;
                        }
                    }
                    else
                    {
                        selectedCount[i]++;
                    }
                }
                if (slots.Count > 0)
                {
                    col.Slots = slots.ToArray();
                    columns.Add(col);
                }
            }
            return(columns);
        }
Esempio n. 2
0
        private static DropSlotsTransform.Column CreateDropSlotsColumn(Arguments args, ref VBuffer <Single> scores, out int selectedCount)
        {
            // Not checking the scores.Length, because:
            // 1. If it's the same as the features column length, we should be constructing the right DropSlots arguments.
            // 2. If it's less, we assume that the rest of the scores are zero and we drop the slots.
            // 3. If it's greater, the drop slots ignores the ranges that are outside the valid range of indices for the column.
            Contracts.Assert(args.Threshold.HasValue != args.NumSlotsToKeep.HasValue);
            var col = new DropSlotsTransform.Column();

            col.Source    = args.FeatureColumn;
            selectedCount = 0;

            // Degenerate case, dropping all slots.
            if (scores.Count == 0)
            {
                var range = new DropSlotsTransform.Range();
                col.Slots = new DropSlotsTransform.Range[] { range };
                return(col);
            }

            int   tiedScoresToKeep;
            float threshold;

            if (args.Threshold.HasValue)
            {
                threshold        = args.Threshold.Value;
                tiedScoresToKeep = threshold > 0 ? int.MaxValue : 0;
            }
            else
            {
                Contracts.Assert(args.NumSlotsToKeep.HasValue);
                threshold = ComputeThreshold(scores.Values, scores.Count, args.NumSlotsToKeep.Value, out tiedScoresToKeep);
            }

            var slots = new List <DropSlotsTransform.Range>();

            for (int i = 0; i < scores.Count; i++)
            {
                var score = Math.Abs(scores.Values[i]);
                if (score > threshold)
                {
                    selectedCount++;
                    continue;
                }
                if (score == threshold && tiedScoresToKeep > 0)
                {
                    tiedScoresToKeep--;
                    selectedCount++;
                    continue;
                }

                var range = new DropSlotsTransform.Range();
                range.Min = i;
                while (++i < scores.Count)
                {
                    score = Math.Abs(scores.Values[i]);
                    if (score > threshold)
                    {
                        selectedCount++;
                        break;
                    }
                    if (score == threshold && tiedScoresToKeep > 0)
                    {
                        tiedScoresToKeep--;
                        selectedCount++;
                        break;
                    }
                }
                range.Max = i - 1;
                slots.Add(range);
            }

            if (!scores.IsDense)
            {
                int ii    = 0;
                var count = slots.Count;
                for (int i = 0; i < count; i++)
                {
                    var range = slots[i];
                    Contracts.Assert(range.Max != null);
                    var min = range.Min;
                    var max = range.Max.Value;
                    Contracts.Assert(min <= max);
                    Contracts.Assert(max < scores.Count);

                    range.Min = min == 0 ? 0 : scores.Indices[min - 1] + 1;
                    range.Max = max == scores.Count - 1 ? scores.Length - 1 : scores.Indices[max + 1] - 1;

                    // Add the gaps before this range.
                    for (; ii < min; ii++)
                    {
                        var gapMin = ii == 0 ? 0 : scores.Indices[ii - 1] + 1;
                        var gapMax = scores.Indices[ii] - 1;
                        if (gapMin <= gapMax)
                        {
                            var gap = new DropSlotsTransform.Range();
                            gap.Min = gapMin;
                            gap.Max = gapMax;
                            slots.Add(gap);
                        }
                    }
                    ii = max;
                }

                // Add the gaps after the last range.
                for (; ii <= scores.Count; ii++)
                {
                    var gapMin = ii == 0 ? 0 : scores.Indices[ii - 1] + 1;
                    var gapMax = ii == scores.Count ? scores.Length - 1 : scores.Indices[ii] - 1;
                    if (gapMin <= gapMax)
                    {
                        var gap = new DropSlotsTransform.Range();
                        gap.Min = gapMin;
                        gap.Max = gapMax;
                        slots.Add(gap);
                    }
                }

                // Remove all slots past scores.Length.
                var lastRange = new DropSlotsTransform.Range();
                lastRange.Min = scores.Length;
                slots.Add(lastRange);
            }

            if (slots.Count > 0)
            {
                col.Slots = slots.ToArray();
                return(col);
            }

            return(null);
        }
Esempio n. 3
0
        private static List <DropSlotsTransform.Column> CreateDropSlotsColumns(string[] cols, int size, Single[][] scores,
                                                                               Single threshold, int tiedScoresToKeep, out int[] selectedCount)
        {
            Contracts.Assert(size > 0);
            Contracts.Assert(Utils.Size(scores) == size);
            Contracts.Assert(Utils.Size(cols) == size);
            Contracts.Assert(threshold > 0 || (threshold == 0 && tiedScoresToKeep == 0));

            var columns = new List <DropSlotsTransform.Column>();

            selectedCount = new int[scores.Length];
            for (int i = 0; i < size; i++)
            {
                var col = new DropSlotsTransform.Column();
                col.Source = cols[i];
                var slots = new List <DropSlotsTransform.Range>();
                var score = scores[i];
                selectedCount[i] = 0;
                for (int j = 0; j < score.Length; j++)
                {
                    var sc = score[j];
                    if (sc > threshold)
                    {
                        selectedCount[i]++;
                        continue;
                    }
                    if (sc == threshold && tiedScoresToKeep > 0)
                    {
                        tiedScoresToKeep--;
                        selectedCount[i]++;
                        continue;
                    }

                    // Adjacent slots are combined into a single range.
                    var range = new DropSlotsTransform.Range();
                    range.Min = j;
                    while (++j < score.Length)
                    {
                        sc = score[j];
                        if (sc > threshold)
                        {
                            selectedCount[i]++;
                            break;
                        }
                        if (sc == threshold && tiedScoresToKeep > 0)
                        {
                            tiedScoresToKeep--;
                            selectedCount[i]++;
                            break;
                        }
                    }
                    range.Max = j - 1;
                    slots.Add(range);
                }
                if (slots.Count > 0)
                {
                    col.Slots = slots.ToArray();
                    columns.Add(col);
                }
            }
            return(columns);
        }