public ActionResult <IList <EdgarDataset> > DeleteDataset(DatasetParameters parameters) { edgarDatasetParser.DeleteDatasetFile(parameters.id, parameters.file); IList <EdgarDataset> datasets = edgarDatasetParser.GetDatasets(); return(Ok(datasets)); }
public void TestDatasetParametersDefault() { var x = new DatasetParameters(); var result = new Dictionary <string, string>(); x.AddParameters(result); Assert.Empty(result); }
public Datasets(CommonParameters cp, DatasetParameters dp, DataSparse trainData, DataSparse validData) { Common = cp; Dataset = dp; Training = LoadTrainingData(trainData); if (validData != null) { Validation = LoadValidationData(Training, validData); } }
public unsafe Dataset(SparseMatrix data, int numCol, CommonParameters cp, DatasetParameters dp, float[] labels = null, float[] weights = null, int[] groups = null, Dataset reference = null) { CommonParameters = cp; DatasetParameters = dp; var pmString = ParamsToString(cp, dp); _handle = IntPtr.Zero; fixed(float *dataPtr = data.Data) fixed(int *indPtr = data.RowExtents, indices = data.ColumnIndices) { PInvokeException.Check(PInvoke.DatasetCreateFromCsr( indPtr, indices, dataPtr, data.RowExtents.Length, data.Data.Length, numCol, pmString, reference?._handle ?? IntPtr.Zero, ref _handle ), nameof(PInvoke.DatasetCreateFromCsr)); } if (labels != null) { SetLabels(labels); } if (weights != null) { SetWeights(weights); } if (groups != null) { SetGroups(groups); } if (NumFeatures != numCol) { throw new Exception("Expected GetNumCols to be equal to numCol"); } if (NumRows != data.RowCount) { throw new Exception("Expected GetNumRows to be equal to numTotalRow"); } }
private static string ParamsToString(CommonParameters cp, DatasetParameters dp) { var dict = new Dictionary <string, string>(); if (cp != null) { cp.AddParameters(dict); } if (dp != null) { dp.AddParameters(dict); } return(ParamsHelper.JoinParameters(dict)); }
/// <summary> /// Create a dataset from the sampling data. /// </summary> private Dataset CreateDatasetFromSamplingData(DataSparse data, CommonParameters cp, DatasetParameters dp) { var dataset = new Dataset(data.Features , data.NumColumns , cp , dp , data.Labels , data.Weights , data.Groups ); return(dataset); }
// Load a dataset from file, adding additional parameters and using the optional reference dataset to align bins public Dataset(string fileName, CommonParameters cp, DatasetParameters dp, Dataset reference = null) { Check.NonNull(fileName, nameof(fileName)); if (!System.IO.File.Exists(fileName)) { throw new ArgumentException(string.Format("File {0} does not exist", fileName)); } if (!fileName.EndsWith(".bin")) { throw new ArgumentException(string.Format("File {0} is not a .bin file", fileName)); } var pmString = ParamsToString(cp, dp); IntPtr refHandle = (reference == null ? IntPtr.Zero : reference.Handle); PInvokeException.Check(PInvoke.DatasetCreateFromFile(fileName.Substring(0, fileName.Length - 4), pmString, refHandle, ref _handle), nameof(PInvoke.DatasetCreateFromFile)); }
public Dataset(Dataset reference, int numTotalRow, float[] labels = null, float[] weights = null, int[] groups = null) { IntPtr refHandle = (reference == null ? IntPtr.Zero : reference.Handle); PInvokeException.Check(PInvoke.DatasetCreateByReference(refHandle, numTotalRow, ref _handle), nameof(PInvoke.DatasetCreateByReference)); CommonParameters = reference.CommonParameters; DatasetParameters = reference.DatasetParameters; if (labels != null) { SetLabels(labels); } if (weights != null) { SetWeights(weights); } if (groups != null) { SetGroups(groups); } }
/* * public static int DatasetCreateFromCsr( * int[] indPtr, * int[] indices, * float[] data, * long nIndPtr, * long numElem, * long numCol, * string parameters, * IntPtr reference, * ref IntPtr ret) * { * return DatasetCreateFromCsr( * indPtr, CApiDType.Int32, * indices, data, CApiDType.Float32, * nIndPtr, numElem, numCol, parameters, reference, ref ret); * } */ /* * public static int DatasetCreateFromCsc( * int[] colPtr, * int[] indices, * float[] data, * long nColPtr, * long nElem, * long numRow, * string parameters, * IntPtr reference, * ref IntPtr ret) * { * return DatasetCreateFromCsc( * colPtr, CApiDType.Int32, * indices, * data, CApiDType.Float32, * nColPtr, nElem, numRow, parameters, reference, ref ret); * } */ /// <summary> /// Create from single matrix /// </summary> //Dataset(float[,] data,bool isRowMajor, Parameters pms = null, Dataset reference = null) //{ // var pmStr = (pms != null) ? pms.ToString() : ""; // var r = (reference != null) ? reference.Handle : IntPtr.Zero; // var rows = data.GetLength(0); // var cols = data.GetLength(1); // PInvokeException.Check(PInvoke.DatasetCreateFromMat(data, rows, cols, isRowMajoe, pmStr, r, ref _handle), // nameof(PInvoke.DatasetCreateFromMat)); //} /* * public static int DatasetCreateFromMat( * float[] data, * int nRow, * int nCol, * bool isRowMajor, * string parameters, * IntPtr reference, * ref IntPtr ret) * { * return DatasetCreateFromMat( * data, CApiDType.Float32, * nRow, nCol, * (isRowMajor ? 1 : 0), * parameters, reference, ref ret); * } */ /* * public static int DatasetCreateFromMats( * float[][] data, * int[] nRow, * int nCol, * bool isRowMajor, * string parameters, * IntPtr reference, * ref IntPtr ret) * { * return DatasetCreateFromMats( * data.Length, * data, CApiDType.Float32, * nRow, nCol, * (isRowMajor ? 1 : 0), * parameters, reference, ref ret); * } */ public unsafe Dataset GetSubset(int[] usedRowIndices, CommonParameters cp = null, DatasetParameters dp = null) { if (cp == null) { cp = CommonParameters; } if (dp == null) { dp = DatasetParameters; } var pmString = ParamsToString(cp, dp); IntPtr p = IntPtr.Zero; fixed(int *usedRowIndices2 = usedRowIndices) PInvokeException.Check(PInvoke.DatasetGetSubset(_handle, usedRowIndices2, usedRowIndices.Length, pmString, ref p), nameof(PInvoke.DatasetGetSubset)); return(new Dataset(p, cp, dp)); }
public unsafe Dataset(double[][] sampleValuePerColumn, int[][] sampleIndicesPerColumn, int numCol, int[] sampleNonZeroCntPerColumn, int numSampleRow, int numTotalRow, CommonParameters cp, DatasetParameters dp, float[] labels = null, float[] weights = null, int[] groups = null) { CommonParameters = cp; DatasetParameters = dp; var pmString = ParamsToString(cp, dp); _handle = IntPtr.Zero; // Use GCHandle to pin the memory, avoid the memory relocation. GCHandle[] gcValues = new GCHandle[numCol]; GCHandle[] gcIndices = new GCHandle[numCol]; try { double *[] ptrArrayValues = new double *[numCol]; int *[] ptrArrayIndices = new int *[numCol]; for (int i = 0; i < numCol; i++) { gcValues[i] = GCHandle.Alloc(sampleValuePerColumn[i], GCHandleType.Pinned); ptrArrayValues[i] = (double *)gcValues[i].AddrOfPinnedObject().ToPointer(); gcIndices[i] = GCHandle.Alloc(sampleIndicesPerColumn[i], GCHandleType.Pinned); ptrArrayIndices[i] = (int *)gcIndices[i].AddrOfPinnedObject().ToPointer(); } ; fixed(double **ptrValues = ptrArrayValues) fixed(int **ptrIndices = ptrArrayIndices) fixed(int *ptrSampleNonZeroCntPerColumn = sampleNonZeroCntPerColumn) { PInvokeException.Check(PInvoke.DatasetCreateFromSampledColumn( (IntPtr)ptrValues, (IntPtr)ptrIndices, numCol, ptrSampleNonZeroCntPerColumn, numSampleRow, numTotalRow, pmString, ref _handle), nameof(PInvoke.DatasetCreateFromSampledColumn)); } } finally { for (int i = 0; i < numCol; i++) { if (gcValues[i].IsAllocated) { gcValues[i].Free(); } if (gcIndices[i].IsAllocated) { gcIndices[i].Free(); } } ; } if (labels != null) { SetLabels(labels); } if (weights != null) { SetWeights(weights); } if (groups != null) { SetGroups(groups); } if (NumFeatures != numCol) { throw new Exception("Expected GetNumCols to be equal to numCol"); } if (NumRows != numTotalRow) { throw new Exception("Expected GetNumRows to be equal to numTotalRow"); } }
private Dataset(IntPtr h, CommonParameters cp, DatasetParameters dp) { _handle = h; CommonParameters = cp; DatasetParameters = dp; }
public unsafe Dataset(float[][] data, int numCol, CommonParameters cp, DatasetParameters dp, float[] labels = null, float[] weights = null, int[] groups = null, Dataset reference = null) { CommonParameters = cp; DatasetParameters = dp; var pmString = ParamsToString(cp, dp); _handle = IntPtr.Zero; var gcHandles = new List <GCHandle>(data.Length); try { float *[] dataPtrs = new float *[data.Length]; int[] nRows = new int[data.Length]; for (int i = 0; i < data.Length; i++) { var hdl = GCHandle.Alloc(data[i], GCHandleType.Pinned); gcHandles.Add(hdl); dataPtrs[i] = (float *)hdl.AddrOfPinnedObject().ToPointer(); nRows[i] = 1; } ; fixed(float **dataPtr = dataPtrs) fixed(int *nRowsPtr = nRows) { PInvokeException.Check(PInvoke.DatasetCreateFromMats( data.Length, dataPtr, nRowsPtr, numCol, /*isRowMajor*/ true, pmString, reference?._handle ?? IntPtr.Zero, ref _handle ), nameof(PInvoke.DatasetCreateFromMats)); } } finally { foreach (var hdl in gcHandles) { if (hdl.IsAllocated) { hdl.Free(); } } ; } if (labels != null) { SetLabels(labels); } if (weights != null) { SetWeights(weights); } if (groups != null) { SetGroups(groups); } if (NumFeatures != numCol) { throw new Exception("Expected GetNumCols to be equal to numCol"); } if (NumRows != data.Length) { throw new Exception("Expected GetNumRows to be equal to numTotalRow"); } }
//////////////////////////////////////////////// /// TODO: JsChartGenerator:methods //////////////////////////////////////////////// string GetDataSet(IEnumerable <string> data, DatasetParameters parameters) { return(@$ " var dataFirst = {{ data: [{string.Concat(data.Select(l => l + ", "))}],
public static Dataset CreateRandom(Random rand) { var numTotalRow = rand.Next(100, 500); var numColumns = rand.Next(1, 10); var cp = new CommonParameters(); var dp = new DatasetParameters { MinDataInLeaf = 1, MinDataInBin = 1 }; if (rand.Next(3) == 0) { var rows = new float[numTotalRow][]; for (int i = 0; i < numTotalRow; ++i) { var row = new float[numColumns]; for (int j = 0; j < row.Length; ++j) { row[j] = (float)rand.NextDouble(); } rows[i] = row; } var ds = new Dataset(rows, numColumns, cp, dp); Assert.Equal(numTotalRow, ds.NumRows); Assert.Equal(numColumns, ds.NumFeatures); return(ds); } else if (rand.Next(2) == 0) { var data = new float[numTotalRow, numColumns]; for (int i = 0; i < numTotalRow; ++i) { for (int j = 0; j < numColumns; ++j) { if (rand.Next(3) == 0) { data[i, j] = (float)rand.NextDouble(); } } } var dataSparse = Dense2Sparse(data); var ds = new Dataset(dataSparse, numColumns, cp, dp); Assert.Equal(numTotalRow, ds.NumRows); Assert.Equal(numColumns, ds.NumFeatures); return(ds); } else { var numSampleRow = rand.Next(5, numTotalRow); var columns = new double[numColumns][]; for (int i = 0; i < numColumns; ++i) { var col = new double[numTotalRow]; for (int j = 0; j < col.Length; ++j) { col[j] = rand.NextDouble(); } columns[i] = col; } // select the sample indices var sampleIndices = new int[numColumns][]; for (int i = 0; i < numColumns; ++i) { var sampleIndex = new int[numTotalRow]; for (int j = 0; j < numSampleRow; ++j) { sampleIndex[j] = j; } sampleIndices[i] = sampleIndex; } var sizePerColumn = new int[numColumns]; for (int i = 0; i < numColumns; ++i) { sizePerColumn[i] = numTotalRow; } var ds = new Dataset(columns, sampleIndices, numColumns, sizePerColumn, numSampleRow, numTotalRow, cp, dp); Assert.Equal(numTotalRow, ds.NumRows); Assert.Equal(numColumns, ds.NumFeatures); return(ds); } }