protected bool CheckSameValues(IDataView view1, IDataView view2, bool exactTypes = true, bool exactDoubles = true, bool checkId = true) { Contracts.Assert(view1.Schema.Count == view2.Schema.Count); bool all = true; bool tmp; using (var curs1 = view1.GetRowCursorForAllColumns()) using (var curs2 = view2.GetRowCursorForAllColumns()) { Check(curs1.Schema == view1.Schema, "Schema of view 1 and its cursor differed"); Check(curs2.Schema == view2.Schema, "Schema of view 2 and its cursor differed"); tmp = CheckSameValues(curs1, curs2, exactTypes, exactDoubles, checkId, true); } Check(tmp, "All same failed"); all &= tmp; var view2EvenCols = view2.Schema.Where(col => (col.Index & 1) == 0); using (var curs1 = view1.GetRowCursorForAllColumns()) using (var curs2 = view2.GetRowCursor(view2EvenCols)) { Check(curs1.Schema == view1.Schema, "Schema of view 1 and its cursor differed"); Check(curs2.Schema == view2.Schema, "Schema of view 2 and its cursor differed"); tmp = CheckSameValues(curs1, curs2, exactTypes, exactDoubles, checkId, false); } Check(tmp, "Even same failed"); all &= tmp; var view2OddCols = view2.Schema.Where(col => (col.Index & 1) == 0); using (var curs1 = view1.GetRowCursorForAllColumns()) using (var curs2 = view2.GetRowCursor(view2OddCols)) { Check(curs1.Schema == view1.Schema, "Schema of view 1 and its cursor differed"); Check(curs2.Schema == view2.Schema, "Schema of view 2 and its cursor differed"); tmp = CheckSameValues(curs1, curs2, exactTypes, exactDoubles, checkId, false); } Check(tmp, "Odd same failed"); using (var curs1 = view1.GetRowCursor(view1.Schema)) { Check(curs1.Schema == view1.Schema, "Schema of view 1 and its cursor differed"); tmp = CheckSameValues(curs1, view2, exactTypes, exactDoubles, checkId); } Check(tmp, "Single value same failed"); all &= tmp; return(all); }
private void ValidateTensorFlowTransformer(IDataView result) { using (var cursor = result.GetRowCursorForAllColumns()) { VBuffer <float> avalue = default; VBuffer <float> bvalue = default; VBuffer <float> cvalue = default; var aGetter = cursor.GetGetter <VBuffer <float> >(result.Schema["a"]); var bGetter = cursor.GetGetter <VBuffer <float> >(result.Schema["b"]); var cGetter = cursor.GetGetter <VBuffer <float> >(result.Schema["c"]); while (cursor.MoveNext()) { aGetter(ref avalue); bGetter(ref bvalue); cGetter(ref cvalue); var aValues = avalue.GetValues(); var bValues = bvalue.GetValues(); var cValues = cvalue.GetValues(); Assert.Equal(aValues[0] * bValues[0] + aValues[1] * bValues[2], cValues[0]); Assert.Equal(aValues[0] * bValues[1] + aValues[1] * bValues[3], cValues[1]); Assert.Equal(aValues[2] * bValues[0] + aValues[3] * bValues[2], cValues[2]); Assert.Equal(aValues[2] * bValues[1] + aValues[3] * bValues[3], cValues[3]); } } }
private void ValidateTermTransformer(IDataView result) { result.Schema.TryGetColumnIndex("TermA", out int ColA); result.Schema.TryGetColumnIndex("TermB", out int ColB); result.Schema.TryGetColumnIndex("TermC", out int ColC); using (var cursor = result.GetRowCursorForAllColumns()) { uint avalue = 0; uint bvalue = 0; uint cvalue = 0; var aGetter = cursor.GetGetter <uint>(ColA); var bGetter = cursor.GetGetter <uint>(ColB); var cGetter = cursor.GetGetter <uint>(ColC); uint i = 1; while (cursor.MoveNext()) { aGetter(ref avalue); bGetter(ref bvalue); cGetter(ref cvalue); Assert.Equal(i, avalue); Assert.Equal(i, bvalue); Assert.Equal(i, cvalue); i++; } } }
public void TestTextLoaderGetters() { using (var rowCursor = _dataView.GetRowCursorForAllColumns()) { var getters = new List <ValueGetter <ReadOnlyMemory <char> > >(); for (int i = 0; i < _numColumnsToGet; i++) { getters.Add(rowCursor.GetGetter <ReadOnlyMemory <char> >(_dataView.Schema[i])); } ReadOnlyMemory <char> buff = default; while (rowCursor.MoveNext()) { for (int i = 0; i < _numColumnsToGet; i++) { getters[i](ref buff); } } } //* Summary * //BenchmarkDotNet = v0.12.0, OS = Windows 10.0.18363 //Intel Core i7 - 8650U CPU 1.90GHz(Kaby Lake R), 1 CPU, 8 logical and 4 physical cores //.NET Core SDK = 3.1.100 - preview3 - 014645 // [Host] : .NET Core 2.1.13(CoreCLR 4.6.28008.01, CoreFX 4.6.28008.01), X64 RyuJIT // Job - XQBLAM : .NET Core 2.1.13(CoreCLR 4.6.28008.01, CoreFX 4.6.28008.01), X64 RyuJIT //Arguments =/ p:Configuration = Release Toolchain = netcoreapp2.1 IterationCount = 1 //LaunchCount = 3 MaxIterationCount = 20 RunStrategy = ColdStart //UnrollFactor = 1 WarmupCount = 1 //| Method | Mean | Error | StdDev | Extra Metric | //| ---------------------- | --------: | ---------:| ---------:| -------------:| //| TestTextLoaderGetters | 1.012 s | 0.6649 s | 0.0364 s | - | //// * Legends * //Mean : Arithmetic mean of all measurements //Error : Half of 99.9 % confidence interval // StdDev : Standard deviation of all measurements // Extra Metric: Value of the provided extra metric // 1 s: 1 Second(1 sec) //// ***** BenchmarkRunner: End ***** //// ** Remained 0 benchmark(s) to run ** // Run time: 00:00:16(16.05 sec), executed benchmarks: 1 //Global total time: 00:00:33(33.18 sec), executed benchmarks: 1 return; }
private void ResetCursor(IDataView input, ref DataViewRowCursor cursor, Dictionary <string, TypedColumn> allColumns) { cursor.Dispose(); cursor = input.GetRowCursorForAllColumns(); // Initialize getters foreach (var column in allColumns.Values) { column.InitializeGetter(cursor); } // Move cursor to valid position var valid = cursor.MoveNext(); Debug.Assert(valid); }
internal DataDebuggerPreview(IDataView data, int maxRows = Defaults.MaxRows) { Contracts.CheckValue(data, nameof(data)); Contracts.CheckParam(maxRows >= 0, nameof(maxRows)); Schema = data.Schema; int n = data.Schema.Count; var rows = new List <RowInfo>(); var columns = new List <object> [n]; for (int i = 0; i < columns.Length; i++) { columns[i] = new List <object>(); } using (var cursor = data.GetRowCursorForAllColumns()) { var setters = new Action <RowInfo, List <object> > [n]; for (int i = 0; i < n; i++) { setters[i] = Utils.MarshalInvoke(MakeSetter <int>, data.Schema[i].Type.RawType, cursor, i); } int count = 0; while (count < maxRows && cursor.MoveNext()) { var curRow = new RowInfo(n); for (int i = 0; i < setters.Length; i++) { setters[i](curRow, columns[i]); } rows.Add(curRow); count++; } } RowView = rows.ToImmutableArray(); ColumnView = Enumerable.Range(0, n).Select(c => new ColumnInfo(data.Schema[c], columns[c].ToArray())).ToImmutableArray(); }
public IPredictor Calibrate(IChannel ch, IDataView data, ICalibratorTrainer caliTrainer, int maxRows) { Host.CheckValue(ch, nameof(ch)); ch.CheckValue(data, nameof(data)); ch.CheckValue(caliTrainer, nameof(caliTrainer)); if (caliTrainer.NeedsTraining) { var bound = new Bound(this, new RoleMappedSchema(data.Schema)); using (var curs = data.GetRowCursorForAllColumns()) { var scoreGetter = (ValueGetter <Single>)bound.CreateScoreGetter(curs, col => true, out Action disposer); // We assume that we can use the label column of the first predictor, since if the labels are not identical // then the whole model is garbage anyway. var labelGetter = bound.GetLabelGetter(curs, 0, out Action disp); disposer += disp; var weightGetter = bound.GetWeightGetter(curs, 0, out disp); disposer += disp; try { int num = 0; while (curs.MoveNext()) { Single label = 0; labelGetter(ref label); if (!FloatUtils.IsFinite(label)) { continue; } Single score = 0; scoreGetter(ref score); if (!FloatUtils.IsFinite(score)) { continue; } Single weight = 0; weightGetter(ref weight); if (!FloatUtils.IsFinite(weight)) { continue; } caliTrainer.ProcessTrainingExample(score, label > 0, weight); if (maxRows > 0 && ++num >= maxRows) { break; } } } finally { disposer?.Invoke(); } } } var calibrator = caliTrainer.FinishTraining(ch); return(CalibratorUtils.CreateCalibratedPredictor(Host, this, calibrator)); }
private unsafe TransformerEstimatorSafeHandle CreateTransformerFromEstimator(IDataView input) { IntPtr estimator; IntPtr errorHandle; bool success; var allColumns = input.Schema.Where(x => _allColumnNames.Contains(x.Name)).Select(x => TypedColumn.CreateTypedColumn(x, _dataColumns)).ToDictionary(x => x.Column.Name); // Create TypeId[] for types of grain and data columns; var dataColumnTypes = new TypeId[_dataColumns.Length]; var grainColumnTypes = new TypeId[_grainColumns.Length]; foreach (var column in _grainColumns.Select((value, index) => new { index, value })) { grainColumnTypes[column.index] = allColumns[column.value].GetTypeId(); } foreach (var column in _dataColumns.Select((value, index) => new { index, value })) { dataColumnTypes[column.index] = allColumns[column.value].GetTypeId(); fixed(bool *suppressErrors = &_suppressTypeErrors) fixed(TypeId * rawDataColumnTypes = dataColumnTypes) fixed(TypeId * rawGrainColumnTypes = grainColumnTypes) { success = CreateEstimatorNative(rawGrainColumnTypes, new IntPtr(grainColumnTypes.Length), rawDataColumnTypes, new IntPtr(dataColumnTypes.Length), _imputeMode, suppressErrors, out estimator, out errorHandle); } if (!success) { throw new Exception(GetErrorDetailsAndFreeNativeMemory(errorHandle)); } using (var estimatorHandle = new TransformerEstimatorSafeHandle(estimator, DestroyEstimatorNative)) { TrainingState trainingState; FitResult fitResult; // Create buffer to hold binary data var memoryStream = new MemoryStream(4096); var binaryWriter = new BinaryWriter(memoryStream, Encoding.UTF8); // Can't use a using with this because it potentially needs to be reset. Manually disposing as needed. var cursor = input.GetRowCursorForAllColumns(); // Initialize getters foreach (var column in allColumns.Values) { column.InitializeGetter(cursor); } // Start the loop with the cursor in a valid state already. var valid = cursor.MoveNext(); // Make sure its not an empty data frame Debug.Assert(valid); while (true) { // Get the state of the native estimator. success = GetStateNative(estimatorHandle, out trainingState, out errorHandle); if (!success) { throw new Exception(GetErrorDetailsAndFreeNativeMemory(errorHandle)); } // If we are no longer training then exit loop. if (trainingState != TrainingState.Training) { break; } // Build byte array to send column data to native featurizer BuildColumnByteArray(allColumns, ref binaryWriter); // Fit the estimator fixed(byte *bufferPointer = memoryStream.GetBuffer()) { var binaryArchiveData = new NativeBinaryArchiveData() { Data = bufferPointer, DataSize = new IntPtr(memoryStream.Position) }; success = FitNative(estimatorHandle, binaryArchiveData, out fitResult, out errorHandle); } // Reset memory stream to 0 memoryStream.Position = 0; if (!success) { throw new Exception(GetErrorDetailsAndFreeNativeMemory(errorHandle)); } // If we need to reset the data to the beginning. if (fitResult == FitResult.ResetAndContinue) { ResetCursor(input, ref cursor, allColumns); } // If we are at the end of the data. if (!cursor.MoveNext()) { // If we get here fitResult should never be ResetAndContinue Debug.Assert(fitResult != FitResult.ResetAndContinue); OnDataCompletedNative(estimatorHandle, out errorHandle); if (!success) { throw new Exception(GetErrorDetailsAndFreeNativeMemory(errorHandle)); } ResetCursor(input, ref cursor, allColumns); } } // When done training complete the estimator. success = CompleteTrainingNative(estimatorHandle, out errorHandle); if (!success) { throw new Exception(GetErrorDetailsAndFreeNativeMemory(errorHandle)); } // Create the native transformer from the estimator; success = CreateTransformerFromEstimatorNative(estimatorHandle, out IntPtr transformer, out errorHandle); if (!success) { throw new Exception(GetErrorDetailsAndFreeNativeMemory(errorHandle)); } // Manually dispose of the IEnumerator since we don't have a using statement; cursor.Dispose(); return(new TransformerEstimatorSafeHandle(transformer, DestroyTransformerNative)); } }
private unsafe TransformerEstimatorSafeHandle CreateTransformerFromEstimator(IDataView input) { IntPtr estimator; IntPtr errorHandle; bool success; var allColumns = input.Schema.Where(x => _allColumnNames.Contains(x.Name)).Select(x => TypedColumn.CreateTypedColumn(x, _dataColumns)).ToDictionary(x => x.Column.Name); // Create buffer to hold binary data var columnBuffer = new byte[4096]; // Create TypeId[] for types of grain and data columns; var dataColumnTypes = new TypeId[_dataColumns.Length]; var grainColumnTypes = new TypeId[_grainColumns.Length]; foreach (var column in _grainColumns.Select((value, index) => new { index, value })) { grainColumnTypes[column.index] = allColumns[column.value].GetTypeId(); } foreach (var column in _dataColumns.Select((value, index) => new { index, value })) { dataColumnTypes[column.index] = allColumns[column.value].GetTypeId(); fixed(bool *suppressErrors = &_suppressTypeErrors) fixed(TypeId * rawDataColumnTypes = dataColumnTypes) fixed(TypeId * rawGrainColumnTypes = grainColumnTypes) { success = CreateEstimatorNative(rawGrainColumnTypes, new IntPtr(grainColumnTypes.Length), rawDataColumnTypes, new IntPtr(dataColumnTypes.Length), _imputeMode, suppressErrors, out estimator, out errorHandle); } if (!success) { throw new Exception(GetErrorDetailsAndFreeNativeMemory(errorHandle)); } using (var estimatorHandler = new TransformerEstimatorSafeHandle(estimator, DestroyEstimatorNative)) { var fitResult = FitResult.Continue; while (fitResult != FitResult.Complete) { using (var cursor = input.GetRowCursorForAllColumns()) { // Initialize getters for start of loop foreach (var column in allColumns.Values) { column.InitializeGetter(cursor); } while ((fitResult == FitResult.Continue || fitResult == FitResult.ResetAndContinue) && cursor.MoveNext()) { BuildColumnByteArray(allColumns, ref columnBuffer, out int serializedDataLength); fixed(byte *bufferPointer = columnBuffer) { var binaryArchiveData = new NativeBinaryArchiveData() { Data = bufferPointer, DataSize = new IntPtr(serializedDataLength) }; success = FitNative(estimatorHandler, binaryArchiveData, out fitResult, out errorHandle); } if (!success) { throw new Exception(GetErrorDetailsAndFreeNativeMemory(errorHandle)); } } success = CompleteTrainingNative(estimatorHandler, out fitResult, out errorHandle); if (!success) { throw new Exception(GetErrorDetailsAndFreeNativeMemory(errorHandle)); } } } success = CreateTransformerFromEstimatorNative(estimatorHandler, out IntPtr transformer, out errorHandle); if (!success) { throw new Exception(GetErrorDetailsAndFreeNativeMemory(errorHandle)); } return(new TransformerEstimatorSafeHandle(transformer, DestroyTransformerNative)); } }