Exemplo n.º 1
0
        private bool TryGetGroupSummaryMetrics(Dictionary <string, IDataView>[] metrics, out IDataView gs)
        {
            Host.AssertNonEmpty(metrics);

            if (metrics.Length == 1)
            {
                return(metrics[0].TryGetValue(RankerEvaluator.GroupSummary, out gs));
            }

            gs = null;
            var gsList = new List <IDataView>();

            for (int i = 0; i < metrics.Length; i++)
            {
                IDataView idv;
                if (!metrics[i].TryGetValue(RankerEvaluator.GroupSummary, out idv))
                {
                    return(false);
                }

                idv = EvaluateUtils.AddFoldIndex(Host, idv, i, metrics.Length);
                gsList.Add(idv);
            }
            gs = AppendRowsDataView.Create(Host, gsList[0].Schema, gsList.ToArray());
            return(true);
        }
Exemplo n.º 2
0
        private bool TryGetGroupSummaryMetrics(Dictionary <string, IDataView>[] metrics, out IDataView gs)
        {
            Host.AssertNonEmpty(metrics);

            if (metrics.Length == 1)
            {
                return(metrics[0].TryGetValue(RankerEvaluator.GroupSummary, out gs));
            }

            gs = null;
            var gsList = new List <IDataView>();

            for (int i = 0; i < metrics.Length; i++)
            {
                IDataView idv;
                if (!metrics[i].TryGetValue(RankerEvaluator.GroupSummary, out idv))
                {
                    return(false);
                }

                // We use the first column in the data view as an input column to the LambdaColumnMapper, because it must have an input.
                var inputColName = idv.Schema.GetColumnName(0);
                var inputColType = idv.Schema.GetColumnType(0);
                idv = Utils.MarshalInvoke(EvaluateUtils.AddKeyColumn <int>, inputColType.RawType, Host, idv,
                                          inputColName, MetricKinds.ColumnNames.FoldIndex, inputColType, metrics.Length, i + 1, "FoldIndex",
                                          default(ValueGetter <VBuffer <DvText> >));
                gsList.Add(idv);
            }
            gs = AppendRowsDataView.Create(Host, gsList[0].Schema, gsList.ToArray());
            return(true);
        }
            public RandCursor(AppendRowsDataView parent, Func <int, bool> needCol, IRandom rand, int[] counts)
                : base(parent)
            {
                Ch.AssertValue(needCol);
                Ch.AssertValue(rand);

                _rand = rand;
                Ch.AssertValue(counts);
                Ch.Assert(Sources.Length == counts.Length);
                _cursorSet = new IRowCursor[counts.Length];
                for (int i = 0; i < counts.Length; i++)
                {
                    Ch.Assert(counts[i] >= 0);
                    _cursorSet[i] = parent._sources[i].GetRowCursor(needCol, RandomUtils.Create(_rand));
                }
                _sampler            = new MultinomialWithoutReplacementSampler(Ch, counts, rand);
                _currentSourceIndex = -1;
                for (int c = 0; c < Getters.Length; c++)
                {
                    if (needCol(c))
                    {
                        Getters[c] = CreateGetter(c);
                    }
                }
            }
Exemplo n.º 4
0
        protected bool TryGetOverallMetrics(Dictionary <string, IDataView>[] metrics, out IDataView overall)
        {
            Host.AssertNonEmpty(metrics);

            if (metrics.Length == 1)
            {
                return(metrics[0].TryGetValue(MetricKinds.OverallMetrics, out overall));
            }

            overall = null;
            var overallList = new List <IDataView>();

            for (int i = 0; i < metrics.Length; i++)
            {
                var       dict = metrics[i];
                IDataView idv;
                if (!dict.TryGetValue(MetricKinds.OverallMetrics, out idv))
                {
                    return(false);
                }

                // Add a fold-name column. We add it as a text column, since it is only used for saving the result summary file.
                // We use the first column in the data view as an input column to the LambdaColumnMapper, because it must have an input.
                // We use DvText.NA as the value of this column since for any stratified row the value will be non empty, so we can uniquely identify
                // the overall row using this column.
                var inputColName = idv.Schema.GetColumnName(0);
                var inputColType = idv.Schema.GetColumnType(0);
                idv = Utils.MarshalInvoke(EvaluateUtils.AddTextColumn <int>, inputColType.RawType, Host,
                                          idv, inputColName, MetricKinds.ColumnNames.FoldIndex, inputColType, string.Format("Fold {0}", i), "FoldName");

                overallList.Add(idv);
            }
            overall = AppendRowsDataView.Create(Host, overallList[0].Schema, overallList.ToArray());
            return(true);
        }
 public CursorBase(AppendRowsDataView parent)
     : base(parent._host)
 {
     Sources = parent._sources;
     Ch.AssertNonEmpty(Sources);
     Schema  = parent._schema;
     Getters = new Delegate[Schema.ColumnCount];
 }
            public Cursor(AppendRowsDataView parent, Func <int, bool> needCol)
                : base(parent)
            {
                Ch.AssertValue(needCol);

                _currentSourceIndex = 0;
                _currentCursor      = Sources[_currentSourceIndex].GetRowCursor(needCol);
                _currentIdGetter    = _currentCursor.GetIdGetter();
                for (int c = 0; c < Getters.Length; c++)
                {
                    if (needCol(c))
                    {
                        Getters[c] = CreateGetter(c);
                    }
                }
            }
        private IDataView AppendPerInstanceDataViews(IEnumerable <IDataView> foldDataViews, IChannel ch)
        {
            // Make sure there are no variable size vector columns.
            // This is a dictionary from the column name to its vector size.
            var vectorSizes                   = new Dictionary <string, int>();
            var firstDvSlotNames              = new Dictionary <string, VBuffer <DvText> >();
            var firstDvKeyColumns             = new List <string>();
            var firstDvVectorKeyColumns       = new List <string>();
            var variableSizeVectorColumnNames = new List <string>();
            var list     = new List <IDataView>();
            int dvNumber = 0;

            foreach (var dv in foldDataViews)
            {
                var hidden = new List <int>();
                for (int i = 0; i < dv.Schema.ColumnCount; i++)
                {
                    if (dv.Schema.IsHidden(i))
                    {
                        hidden.Add(i);
                        continue;
                    }

                    var type = dv.Schema.GetColumnType(i);
                    var name = dv.Schema.GetColumnName(i);
                    if (type.IsVector)
                    {
                        if (dvNumber == 0)
                        {
                            if (dv.Schema.HasKeyNames(i, type.ItemType.KeyCount))
                            {
                                firstDvVectorKeyColumns.Add(name);
                            }
                            // Store the slot names of the 1st idv and use them as baseline.
                            if (dv.Schema.HasSlotNames(i, type.VectorSize))
                            {
                                VBuffer <DvText> slotNames = default(VBuffer <DvText>);
                                dv.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, i, ref slotNames);
                                firstDvSlotNames.Add(name, slotNames);
                            }
                        }

                        int cachedSize;
                        if (vectorSizes.TryGetValue(name, out cachedSize))
                        {
                            VBuffer <DvText> slotNames;
                            // In the event that no slot names were recorded here, then slotNames will be
                            // the default, length 0 vector.
                            firstDvSlotNames.TryGetValue(name, out slotNames);
                            if (!VerifyVectorColumnsMatch(cachedSize, i, dv, type, ref slotNames))
                            {
                                variableSizeVectorColumnNames.Add(name);
                            }
                        }
                        else
                        {
                            vectorSizes.Add(name, type.VectorSize);
                        }
                    }
                    else if (dvNumber == 0 && dv.Schema.HasKeyNames(i, type.KeyCount))
                    {
                        // The label column can be a key. Reconcile the key values, and wrap with a KeyToValue transform.
                        firstDvKeyColumns.Add(name);
                    }
                }
                var idv = dv;
                if (hidden.Count > 0)
                {
                    var args = new ChooseColumnsByIndexTransform.Arguments();
                    args.Drop  = true;
                    args.Index = hidden.ToArray();
                    idv        = new ChooseColumnsByIndexTransform(Host, args, idv);
                }
                list.Add(idv);
                dvNumber++;
            }

            if (variableSizeVectorColumnNames.Count == 0 && firstDvKeyColumns.Count == 0)
            {
                return(AppendRowsDataView.Create(Host, null, list.ToArray()));
            }

            var views = list.ToArray();

            foreach (var keyCol in firstDvKeyColumns)
            {
                EvaluateUtils.ReconcileKeyValues(Host, views, keyCol);
            }
            foreach (var vectorKeyCol in firstDvVectorKeyColumns)
            {
                EvaluateUtils.ReconcileVectorKeyValues(Host, views, vectorKeyCol);
            }

            Func <IDataView, int, IDataView> keyToValue =
                (idv, i) =>
            {
                foreach (var keyCol in firstDvKeyColumns.Concat(firstDvVectorKeyColumns))
                {
                    idv = new KeyToValueTransform(Host, new KeyToValueTransform.Arguments()
                    {
                        Column = new[] { new KeyToValueTransform.Column()
                                         {
                                             Name = keyCol
                                         }, }
                    }, idv);
                    var hidden = FindHiddenColumns(idv.Schema, keyCol);
                    idv = new ChooseColumnsByIndexTransform(Host, new ChooseColumnsByIndexTransform.Arguments()
                    {
                        Drop = true, Index = hidden.ToArray()
                    }, idv);
                }
                return(idv);
            };

            Func <IDataView, IDataView> selectDropNonVarLenthCol =
                (idv) =>
            {
                foreach (var variableSizeVectorColumnName in variableSizeVectorColumnNames)
                {
                    int index;
                    idv.Schema.TryGetColumnIndex(variableSizeVectorColumnName, out index);
                    var type = idv.Schema.GetColumnType(index);

                    idv = Utils.MarshalInvoke(AddVarLengthColumn <int>, type.ItemType.RawType, Host, idv,
                                              variableSizeVectorColumnName, type);

                    // Drop the old column that does not have variable length.
                    idv = new DropColumnsTransform(Host, new DropColumnsTransform.Arguments()
                    {
                        Column = new[] { variableSizeVectorColumnName }
                    }, idv);
                }
                return(idv);
            };

            if (variableSizeVectorColumnNames.Count > 0)
            {
                ch.Warning("Detected columns of variable length: {0}. Consider setting collateMetrics- for meaningful per-Folds results.", string.Join(", ", variableSizeVectorColumnNames));
            }
            return(AppendRowsDataView.Create(Host, null, views.Select(keyToValue).Select(selectDropNonVarLenthCol).ToArray()));
        }