private MatrixFactorizationPredictor TrainCore(IChannel ch, RoleMappedData data, RoleMappedData validData = null) { Host.AssertValue(ch); ch.AssertValue(data); ch.AssertValueOrNull(validData); ColumnInfo matrixColumnIndexColInfo; ColumnInfo matrixRowIndexColInfo; ColumnInfo validMatrixColumnIndexColInfo = null; ColumnInfo validMatrixRowIndexColInfo = null; ch.CheckValue(data.Schema.Label, nameof(data), "Input data did not have a unique label"); RecommenderUtils.CheckAndGetMatrixIndexColumns(data, out matrixColumnIndexColInfo, out matrixRowIndexColInfo, isDecode: false); if (data.Schema.Label.Type != NumberType.R4 && data.Schema.Label.Type != NumberType.R8) { throw ch.Except("Column '{0}' for label should be floating point, but is instead {1}", data.Schema.Label.Name, data.Schema.Label.Type); } MatrixFactorizationPredictor predictor; if (validData != null) { ch.CheckValue(validData, nameof(validData)); ch.CheckValue(validData.Schema.Label, nameof(validData), "Input validation data did not have a unique label"); RecommenderUtils.CheckAndGetMatrixIndexColumns(validData, out validMatrixColumnIndexColInfo, out validMatrixRowIndexColInfo, isDecode: false); if (validData.Schema.Label.Type != NumberType.R4 && validData.Schema.Label.Type != NumberType.R8) { throw ch.Except("Column '{0}' for validation label should be floating point, but is instead {1}", data.Schema.Label.Name, data.Schema.Label.Type); } if (!matrixColumnIndexColInfo.Type.Equals(validMatrixColumnIndexColInfo.Type)) { throw ch.ExceptParam(nameof(validData), "Train and validation sets' matrix-column types differed, {0} vs. {1}", matrixColumnIndexColInfo.Type, validMatrixColumnIndexColInfo.Type); } if (!matrixRowIndexColInfo.Type.Equals(validMatrixRowIndexColInfo.Type)) { throw ch.ExceptParam(nameof(validData), "Train and validation sets' matrix-row types differed, {0} vs. {1}", matrixRowIndexColInfo.Type, validMatrixRowIndexColInfo.Type); } } int colCount = matrixColumnIndexColInfo.Type.KeyCount; int rowCount = matrixRowIndexColInfo.Type.KeyCount; ch.Assert(rowCount > 0); ch.Assert(colCount > 0); // Checks for equality on the validation set ensure it is correct here. using (var cursor = data.Data.GetRowCursor(c => c == matrixColumnIndexColInfo.Index || c == matrixRowIndexColInfo.Index || c == data.Schema.Label.Index)) { // LibMF works only over single precision floats, but we want to be able to consume either. var labGetter = RowCursorUtils.GetGetterAs <float>(NumberType.R4, cursor, data.Schema.Label.Index); var matrixColumnIndexGetter = RowCursorUtils.GetGetterAs <uint>(NumberType.U4, cursor, matrixColumnIndexColInfo.Index); var matrixRowIndexGetter = RowCursorUtils.GetGetterAs <uint>(NumberType.U4, cursor, matrixRowIndexColInfo.Index); if (validData == null) { // Have the trainer do its work. using (var buffer = PrepareBuffer()) { buffer.Train(ch, rowCount, colCount, cursor, labGetter, matrixRowIndexGetter, matrixColumnIndexGetter); predictor = new MatrixFactorizationPredictor(Host, buffer, matrixColumnIndexColInfo.Type.AsKey, matrixRowIndexColInfo.Type.AsKey); } } else { using (var validCursor = validData.Data.GetRowCursor( c => c == validMatrixColumnIndexColInfo.Index || c == validMatrixRowIndexColInfo.Index || c == validData.Schema.Label.Index)) { ValueGetter <float> validLabelGetter = RowCursorUtils.GetGetterAs <float>(NumberType.R4, validCursor, validData.Schema.Label.Index); var validMatrixColumnIndexGetter = RowCursorUtils.GetGetterAs <uint>(NumberType.U4, validCursor, validMatrixColumnIndexColInfo.Index); var validMatrixRowIndexGetter = RowCursorUtils.GetGetterAs <uint>(NumberType.U4, validCursor, validMatrixRowIndexColInfo.Index); // Have the trainer do its work. using (var buffer = PrepareBuffer()) { buffer.TrainWithValidation(ch, rowCount, colCount, cursor, labGetter, matrixRowIndexGetter, matrixColumnIndexGetter, validCursor, validLabelGetter, validMatrixRowIndexGetter, validMatrixColumnIndexGetter); predictor = new MatrixFactorizationPredictor(Host, buffer, matrixColumnIndexColInfo.Type.AsKey, matrixRowIndexColInfo.Type.AsKey); } } } } return(predictor); }
private MatrixFactorizationModelParameters TrainCore(IChannel ch, RoleMappedData data, RoleMappedData validData = null) { _host.AssertValue(ch); ch.AssertValue(data); ch.AssertValueOrNull(validData); ch.CheckParam(data.Schema.Label.HasValue, nameof(data), "Input data did not have a unique label"); RecommenderUtils.CheckAndGetMatrixIndexColumns(data, out var matrixColumnIndexColInfo, out var matrixRowIndexColInfo, isDecode: false); var labelCol = data.Schema.Label.Value; if (labelCol.Type != NumberDataViewType.Single && labelCol.Type != NumberDataViewType.Double) { throw ch.Except("Column '{0}' for label should be floating point, but is instead {1}", labelCol.Name, labelCol.Type); } MatrixFactorizationModelParameters predictor; if (validData != null) { ch.CheckValue(validData, nameof(validData)); ch.CheckParam(validData.Schema.Label.HasValue, nameof(validData), "Input validation data did not have a unique label"); RecommenderUtils.CheckAndGetMatrixIndexColumns(validData, out var validMatrixColumnIndexColInfo, out var validMatrixRowIndexColInfo, isDecode: false); var validLabelCol = validData.Schema.Label.Value; if (validLabelCol.Type != NumberDataViewType.Single && validLabelCol.Type != NumberDataViewType.Double) { throw ch.Except("Column '{0}' for validation label should be floating point, but is instead {1}", validLabelCol.Name, validLabelCol.Type); } if (!matrixColumnIndexColInfo.Type.Equals(validMatrixColumnIndexColInfo.Type)) { throw ch.ExceptParam(nameof(validData), "Train and validation sets' matrix-column types differed, {0} vs. {1}", matrixColumnIndexColInfo.Type, validMatrixColumnIndexColInfo.Type); } if (!matrixRowIndexColInfo.Type.Equals(validMatrixRowIndexColInfo.Type)) { throw ch.ExceptParam(nameof(validData), "Train and validation sets' matrix-row types differed, {0} vs. {1}", matrixRowIndexColInfo.Type, validMatrixRowIndexColInfo.Type); } } int colCount = matrixColumnIndexColInfo.Type.GetKeyCountAsInt32(_host); int rowCount = matrixRowIndexColInfo.Type.GetKeyCountAsInt32(_host); ch.Assert(rowCount > 0); ch.Assert(colCount > 0); // Checks for equality on the validation set ensure it is correct here. using (var cursor = data.Data.GetRowCursor(matrixColumnIndexColInfo, matrixRowIndexColInfo, data.Schema.Label.Value)) { // LibMF works only over single precision floats, but we want to be able to consume either. var labGetter = RowCursorUtils.GetGetterAs <float>(NumberDataViewType.Single, cursor, data.Schema.Label.Value.Index); var matrixColumnIndexGetter = RowCursorUtils.GetGetterAs <uint>(NumberDataViewType.UInt32, cursor, matrixColumnIndexColInfo.Index); var matrixRowIndexGetter = RowCursorUtils.GetGetterAs <uint>(NumberDataViewType.UInt32, cursor, matrixRowIndexColInfo.Index); if (validData == null) { // Have the trainer do its work. using (var buffer = PrepareBuffer()) { buffer.Train(ch, rowCount, colCount, cursor, labGetter, matrixRowIndexGetter, matrixColumnIndexGetter); predictor = new MatrixFactorizationModelParameters(_host, buffer, (KeyDataViewType)matrixColumnIndexColInfo.Type, (KeyDataViewType)matrixRowIndexColInfo.Type); } } else { RecommenderUtils.CheckAndGetMatrixIndexColumns(validData, out var validMatrixColumnIndexColInfo, out var validMatrixRowIndexColInfo, isDecode: false); using (var validCursor = validData.Data.GetRowCursor(matrixColumnIndexColInfo, matrixRowIndexColInfo, data.Schema.Label.Value)) { ValueGetter <float> validLabelGetter = RowCursorUtils.GetGetterAs <float>(NumberDataViewType.Single, validCursor, validData.Schema.Label.Value.Index); var validMatrixColumnIndexGetter = RowCursorUtils.GetGetterAs <uint>(NumberDataViewType.UInt32, validCursor, validMatrixColumnIndexColInfo.Index); var validMatrixRowIndexGetter = RowCursorUtils.GetGetterAs <uint>(NumberDataViewType.UInt32, validCursor, validMatrixRowIndexColInfo.Index); // Have the trainer do its work. using (var buffer = PrepareBuffer()) { buffer.TrainWithValidation(ch, rowCount, colCount, cursor, labGetter, matrixRowIndexGetter, matrixColumnIndexGetter, validCursor, validLabelGetter, validMatrixRowIndexGetter, validMatrixColumnIndexGetter); predictor = new MatrixFactorizationModelParameters(_host, buffer, (KeyDataViewType)matrixColumnIndexColInfo.Type, (KeyDataViewType)matrixRowIndexColInfo.Type); } } } } return(predictor); }