private bool TryGetGroupSummaryMetrics(Dictionary <string, IDataView>[] metrics, out IDataView gs) { Host.AssertNonEmpty(metrics); if (metrics.Length == 1) { return(metrics[0].TryGetValue(RankerEvaluator.GroupSummary, out gs)); } gs = null; var gsList = new List <IDataView>(); for (int i = 0; i < metrics.Length; i++) { IDataView idv; if (!metrics[i].TryGetValue(RankerEvaluator.GroupSummary, out idv)) { return(false); } idv = EvaluateUtils.AddFoldIndex(Host, idv, i, metrics.Length); gsList.Add(idv); } gs = AppendRowsDataView.Create(Host, gsList[0].Schema, gsList.ToArray()); return(true); }
private bool TryGetGroupSummaryMetrics(Dictionary <string, IDataView>[] metrics, out IDataView gs) { Host.AssertNonEmpty(metrics); if (metrics.Length == 1) { return(metrics[0].TryGetValue(RankerEvaluator.GroupSummary, out gs)); } gs = null; var gsList = new List <IDataView>(); for (int i = 0; i < metrics.Length; i++) { IDataView idv; if (!metrics[i].TryGetValue(RankerEvaluator.GroupSummary, out idv)) { return(false); } // We use the first column in the data view as an input column to the LambdaColumnMapper, because it must have an input. var inputColName = idv.Schema.GetColumnName(0); var inputColType = idv.Schema.GetColumnType(0); idv = Utils.MarshalInvoke(EvaluateUtils.AddKeyColumn <int>, inputColType.RawType, Host, idv, inputColName, MetricKinds.ColumnNames.FoldIndex, inputColType, metrics.Length, i + 1, "FoldIndex", default(ValueGetter <VBuffer <DvText> >)); gsList.Add(idv); } gs = AppendRowsDataView.Create(Host, gsList[0].Schema, gsList.ToArray()); return(true); }
public RandCursor(AppendRowsDataView parent, Func <int, bool> needCol, IRandom rand, int[] counts) : base(parent) { Ch.AssertValue(needCol); Ch.AssertValue(rand); _rand = rand; Ch.AssertValue(counts); Ch.Assert(Sources.Length == counts.Length); _cursorSet = new IRowCursor[counts.Length]; for (int i = 0; i < counts.Length; i++) { Ch.Assert(counts[i] >= 0); _cursorSet[i] = parent._sources[i].GetRowCursor(needCol, RandomUtils.Create(_rand)); } _sampler = new MultinomialWithoutReplacementSampler(Ch, counts, rand); _currentSourceIndex = -1; for (int c = 0; c < Getters.Length; c++) { if (needCol(c)) { Getters[c] = CreateGetter(c); } } }
protected bool TryGetOverallMetrics(Dictionary <string, IDataView>[] metrics, out IDataView overall) { Host.AssertNonEmpty(metrics); if (metrics.Length == 1) { return(metrics[0].TryGetValue(MetricKinds.OverallMetrics, out overall)); } overall = null; var overallList = new List <IDataView>(); for (int i = 0; i < metrics.Length; i++) { var dict = metrics[i]; IDataView idv; if (!dict.TryGetValue(MetricKinds.OverallMetrics, out idv)) { return(false); } // Add a fold-name column. We add it as a text column, since it is only used for saving the result summary file. // We use the first column in the data view as an input column to the LambdaColumnMapper, because it must have an input. // We use DvText.NA as the value of this column since for any stratified row the value will be non empty, so we can uniquely identify // the overall row using this column. var inputColName = idv.Schema.GetColumnName(0); var inputColType = idv.Schema.GetColumnType(0); idv = Utils.MarshalInvoke(EvaluateUtils.AddTextColumn <int>, inputColType.RawType, Host, idv, inputColName, MetricKinds.ColumnNames.FoldIndex, inputColType, string.Format("Fold {0}", i), "FoldName"); overallList.Add(idv); } overall = AppendRowsDataView.Create(Host, overallList[0].Schema, overallList.ToArray()); return(true); }
public CursorBase(AppendRowsDataView parent) : base(parent._host) { Sources = parent._sources; Ch.AssertNonEmpty(Sources); Schema = parent._schema; Getters = new Delegate[Schema.ColumnCount]; }
public Cursor(AppendRowsDataView parent, Func <int, bool> needCol) : base(parent) { Ch.AssertValue(needCol); _currentSourceIndex = 0; _currentCursor = Sources[_currentSourceIndex].GetRowCursor(needCol); _currentIdGetter = _currentCursor.GetIdGetter(); for (int c = 0; c < Getters.Length; c++) { if (needCol(c)) { Getters[c] = CreateGetter(c); } } }
private IDataView AppendPerInstanceDataViews(IEnumerable <IDataView> foldDataViews, IChannel ch) { // Make sure there are no variable size vector columns. // This is a dictionary from the column name to its vector size. var vectorSizes = new Dictionary <string, int>(); var firstDvSlotNames = new Dictionary <string, VBuffer <DvText> >(); var firstDvKeyColumns = new List <string>(); var firstDvVectorKeyColumns = new List <string>(); var variableSizeVectorColumnNames = new List <string>(); var list = new List <IDataView>(); int dvNumber = 0; foreach (var dv in foldDataViews) { var hidden = new List <int>(); for (int i = 0; i < dv.Schema.ColumnCount; i++) { if (dv.Schema.IsHidden(i)) { hidden.Add(i); continue; } var type = dv.Schema.GetColumnType(i); var name = dv.Schema.GetColumnName(i); if (type.IsVector) { if (dvNumber == 0) { if (dv.Schema.HasKeyNames(i, type.ItemType.KeyCount)) { firstDvVectorKeyColumns.Add(name); } // Store the slot names of the 1st idv and use them as baseline. if (dv.Schema.HasSlotNames(i, type.VectorSize)) { VBuffer <DvText> slotNames = default(VBuffer <DvText>); dv.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, i, ref slotNames); firstDvSlotNames.Add(name, slotNames); } } int cachedSize; if (vectorSizes.TryGetValue(name, out cachedSize)) { VBuffer <DvText> slotNames; // In the event that no slot names were recorded here, then slotNames will be // the default, length 0 vector. firstDvSlotNames.TryGetValue(name, out slotNames); if (!VerifyVectorColumnsMatch(cachedSize, i, dv, type, ref slotNames)) { variableSizeVectorColumnNames.Add(name); } } else { vectorSizes.Add(name, type.VectorSize); } } else if (dvNumber == 0 && dv.Schema.HasKeyNames(i, type.KeyCount)) { // The label column can be a key. Reconcile the key values, and wrap with a KeyToValue transform. firstDvKeyColumns.Add(name); } } var idv = dv; if (hidden.Count > 0) { var args = new ChooseColumnsByIndexTransform.Arguments(); args.Drop = true; args.Index = hidden.ToArray(); idv = new ChooseColumnsByIndexTransform(Host, args, idv); } list.Add(idv); dvNumber++; } if (variableSizeVectorColumnNames.Count == 0 && firstDvKeyColumns.Count == 0) { return(AppendRowsDataView.Create(Host, null, list.ToArray())); } var views = list.ToArray(); foreach (var keyCol in firstDvKeyColumns) { EvaluateUtils.ReconcileKeyValues(Host, views, keyCol); } foreach (var vectorKeyCol in firstDvVectorKeyColumns) { EvaluateUtils.ReconcileVectorKeyValues(Host, views, vectorKeyCol); } Func <IDataView, int, IDataView> keyToValue = (idv, i) => { foreach (var keyCol in firstDvKeyColumns.Concat(firstDvVectorKeyColumns)) { idv = new KeyToValueTransform(Host, new KeyToValueTransform.Arguments() { Column = new[] { new KeyToValueTransform.Column() { Name = keyCol }, } }, idv); var hidden = FindHiddenColumns(idv.Schema, keyCol); idv = new ChooseColumnsByIndexTransform(Host, new ChooseColumnsByIndexTransform.Arguments() { Drop = true, Index = hidden.ToArray() }, idv); } return(idv); }; Func <IDataView, IDataView> selectDropNonVarLenthCol = (idv) => { foreach (var variableSizeVectorColumnName in variableSizeVectorColumnNames) { int index; idv.Schema.TryGetColumnIndex(variableSizeVectorColumnName, out index); var type = idv.Schema.GetColumnType(index); idv = Utils.MarshalInvoke(AddVarLengthColumn <int>, type.ItemType.RawType, Host, idv, variableSizeVectorColumnName, type); // Drop the old column that does not have variable length. idv = new DropColumnsTransform(Host, new DropColumnsTransform.Arguments() { Column = new[] { variableSizeVectorColumnName } }, idv); } return(idv); }; if (variableSizeVectorColumnNames.Count > 0) { ch.Warning("Detected columns of variable length: {0}. Consider setting collateMetrics- for meaningful per-Folds results.", string.Join(", ", variableSizeVectorColumnNames)); } return(AppendRowsDataView.Create(Host, null, views.Select(keyToValue).Select(selectDropNonVarLenthCol).ToArray())); }