コード例 #1
0
        public ActionResult <IList <EdgarDataset> > DeleteDataset(DatasetParameters parameters)
        {
            edgarDatasetParser.DeleteDatasetFile(parameters.id, parameters.file);
            IList <EdgarDataset> datasets = edgarDatasetParser.GetDatasets();

            return(Ok(datasets));
        }
コード例 #2
0
        public void TestDatasetParametersDefault()
        {
            var x      = new DatasetParameters();
            var result = new Dictionary <string, string>();

            x.AddParameters(result);
            Assert.Empty(result);
        }
コード例 #3
0
ファイル: TrainerBase.cs プロジェクト: rca22/LightGBM.Net
        public Datasets(CommonParameters cp, DatasetParameters dp, DataSparse trainData, DataSparse validData)
        {
            Common  = cp;
            Dataset = dp;

            Training = LoadTrainingData(trainData);
            if (validData != null)
            {
                Validation = LoadValidationData(Training, validData);
            }
        }
コード例 #4
0
        public unsafe Dataset(SparseMatrix data,
                              int numCol,
                              CommonParameters cp,
                              DatasetParameters dp,
                              float[] labels    = null,
                              float[] weights   = null,
                              int[] groups      = null,
                              Dataset reference = null)
        {
            CommonParameters  = cp;
            DatasetParameters = dp;
            var pmString = ParamsToString(cp, dp);

            _handle = IntPtr.Zero;

            fixed(float *dataPtr = data.Data)
            fixed(int *indPtr = data.RowExtents, indices = data.ColumnIndices)
            {
                PInvokeException.Check(PInvoke.DatasetCreateFromCsr(
                                           indPtr,
                                           indices,
                                           dataPtr,
                                           data.RowExtents.Length,
                                           data.Data.Length,
                                           numCol,
                                           pmString,
                                           reference?._handle ?? IntPtr.Zero,
                                           ref _handle
                                           ), nameof(PInvoke.DatasetCreateFromCsr));
            }

            if (labels != null)
            {
                SetLabels(labels);
            }
            if (weights != null)
            {
                SetWeights(weights);
            }
            if (groups != null)
            {
                SetGroups(groups);
            }

            if (NumFeatures != numCol)
            {
                throw new Exception("Expected GetNumCols to be equal to numCol");
            }

            if (NumRows != data.RowCount)
            {
                throw new Exception("Expected GetNumRows to be equal to numTotalRow");
            }
        }
コード例 #5
0
        private static string ParamsToString(CommonParameters cp, DatasetParameters dp)
        {
            var dict = new Dictionary <string, string>();

            if (cp != null)
            {
                cp.AddParameters(dict);
            }
            if (dp != null)
            {
                dp.AddParameters(dict);
            }
            return(ParamsHelper.JoinParameters(dict));
        }
コード例 #6
0
ファイル: TrainerBase.cs プロジェクト: rca22/LightGBM.Net
        /// <summary>
        /// Create a dataset from the sampling data.
        /// </summary>
        private Dataset CreateDatasetFromSamplingData(DataSparse data,
                                                      CommonParameters cp,
                                                      DatasetParameters dp)
        {
            var dataset = new Dataset(data.Features
                                      , data.NumColumns
                                      , cp
                                      , dp
                                      , data.Labels
                                      , data.Weights
                                      , data.Groups
                                      );

            return(dataset);
        }
コード例 #7
0
        // Load a dataset from file, adding additional parameters and using the optional reference dataset to align bins
        public Dataset(string fileName, CommonParameters cp, DatasetParameters dp, Dataset reference = null)
        {
            Check.NonNull(fileName, nameof(fileName));
            if (!System.IO.File.Exists(fileName))
            {
                throw new ArgumentException(string.Format("File {0} does not exist", fileName));
            }
            if (!fileName.EndsWith(".bin"))
            {
                throw new ArgumentException(string.Format("File {0} is not a .bin file", fileName));
            }

            var pmString = ParamsToString(cp, dp);

            IntPtr refHandle = (reference == null ? IntPtr.Zero : reference.Handle);

            PInvokeException.Check(PInvoke.DatasetCreateFromFile(fileName.Substring(0, fileName.Length - 4), pmString, refHandle, ref _handle),
                                   nameof(PInvoke.DatasetCreateFromFile));
        }
コード例 #8
0
ファイル: Dataset.cs プロジェクト: kdjsrt/LightGBM.Net
        public Dataset(Dataset reference, int numTotalRow, float[] labels = null, float[] weights = null, int[] groups = null)
        {
            IntPtr refHandle = (reference == null ? IntPtr.Zero : reference.Handle);

            PInvokeException.Check(PInvoke.DatasetCreateByReference(refHandle, numTotalRow, ref _handle),
                                   nameof(PInvoke.DatasetCreateByReference));

            CommonParameters  = reference.CommonParameters;
            DatasetParameters = reference.DatasetParameters;
            if (labels != null)
            {
                SetLabels(labels);
            }
            if (weights != null)
            {
                SetWeights(weights);
            }
            if (groups != null)
            {
                SetGroups(groups);
            }
        }
コード例 #9
0
/*
 *      public static int DatasetCreateFromCsr(
 *          int[] indPtr,
 *          int[] indices,
 *          float[] data,
 *          long nIndPtr,
 *          long numElem,
 *          long numCol,
 *          string parameters,
 *          IntPtr reference,
 *          ref IntPtr ret)
 *      {
 *          return DatasetCreateFromCsr(
 *              indPtr, CApiDType.Int32,
 *              indices, data, CApiDType.Float32,
 *              nIndPtr, numElem, numCol, parameters, reference, ref ret);
 *      }
 */

/*
 *      public static int DatasetCreateFromCsc(
 *          int[] colPtr,
 *          int[] indices,
 *          float[] data,
 *          long nColPtr,
 *          long nElem,
 *          long numRow,
 *          string parameters,
 *          IntPtr reference,
 *          ref IntPtr ret)
 *      {
 *          return DatasetCreateFromCsc(
 *              colPtr, CApiDType.Int32,
 *              indices,
 *              data, CApiDType.Float32,
 *              nColPtr, nElem, numRow, parameters, reference, ref ret);
 *      }
 */

        /// <summary>
        /// Create from single matrix
        /// </summary>
        //Dataset(float[,] data,bool isRowMajor, Parameters pms = null, Dataset reference = null)
        //{
        //    var pmStr = (pms != null) ? pms.ToString() : "";
        //    var r = (reference != null) ? reference.Handle : IntPtr.Zero;
        //    var rows = data.GetLength(0);
        //    var cols = data.GetLength(1);

        //    PInvokeException.Check(PInvoke.DatasetCreateFromMat(data, rows, cols, isRowMajoe, pmStr, r, ref _handle),
        //                           nameof(PInvoke.DatasetCreateFromMat));
        //}

/*
 *      public static int DatasetCreateFromMat(
 *          float[] data,
 *          int nRow,
 *          int nCol,
 *          bool isRowMajor,
 *          string parameters,
 *          IntPtr reference,
 *          ref IntPtr ret)
 *      {
 *          return DatasetCreateFromMat(
 *              data, CApiDType.Float32,
 *              nRow, nCol,
 *              (isRowMajor ? 1 : 0),
 *              parameters, reference, ref ret);
 *      }
 */
/*
 *      public static int DatasetCreateFromMats(
 *          float[][] data,
 *          int[] nRow,
 *          int nCol,
 *          bool isRowMajor,
 *          string parameters,
 *          IntPtr reference,
 *          ref IntPtr ret)
 *      {
 *          return DatasetCreateFromMats(
 *              data.Length,
 *              data, CApiDType.Float32,
 *              nRow, nCol,
 *              (isRowMajor ? 1 : 0),
 *              parameters, reference, ref ret);
 *      }
 */

        public unsafe Dataset GetSubset(int[] usedRowIndices, CommonParameters cp = null, DatasetParameters dp = null)
        {
            if (cp == null)
            {
                cp = CommonParameters;
            }
            if (dp == null)
            {
                dp = DatasetParameters;
            }
            var    pmString = ParamsToString(cp, dp);
            IntPtr p        = IntPtr.Zero;

            fixed(int *usedRowIndices2 = usedRowIndices)
            PInvokeException.Check(PInvoke.DatasetGetSubset(_handle, usedRowIndices2, usedRowIndices.Length, pmString, ref p),
                                   nameof(PInvoke.DatasetGetSubset));

            return(new Dataset(p, cp, dp));
        }
コード例 #10
0
        public unsafe Dataset(double[][] sampleValuePerColumn,
                              int[][] sampleIndicesPerColumn,
                              int numCol,
                              int[] sampleNonZeroCntPerColumn,
                              int numSampleRow,
                              int numTotalRow,
                              CommonParameters cp,
                              DatasetParameters dp,
                              float[] labels  = null,
                              float[] weights = null,
                              int[] groups    = null)
        {
            CommonParameters  = cp;
            DatasetParameters = dp;
            var pmString = ParamsToString(cp, dp);

            _handle = IntPtr.Zero;

            // Use GCHandle to pin the memory, avoid the memory relocation.
            GCHandle[] gcValues  = new GCHandle[numCol];
            GCHandle[] gcIndices = new GCHandle[numCol];
            try
            {
                double *[] ptrArrayValues  = new double *[numCol];
                int *[]    ptrArrayIndices = new int *[numCol];
                for (int i = 0; i < numCol; i++)
                {
                    gcValues[i]        = GCHandle.Alloc(sampleValuePerColumn[i], GCHandleType.Pinned);
                    ptrArrayValues[i]  = (double *)gcValues[i].AddrOfPinnedObject().ToPointer();
                    gcIndices[i]       = GCHandle.Alloc(sampleIndicesPerColumn[i], GCHandleType.Pinned);
                    ptrArrayIndices[i] = (int *)gcIndices[i].AddrOfPinnedObject().ToPointer();
                }
                ;
                fixed(double **ptrValues = ptrArrayValues)
                fixed(int **ptrIndices = ptrArrayIndices)
                fixed(int *ptrSampleNonZeroCntPerColumn = sampleNonZeroCntPerColumn)
                {
                    PInvokeException.Check(PInvoke.DatasetCreateFromSampledColumn(
                                               (IntPtr)ptrValues, (IntPtr)ptrIndices, numCol, ptrSampleNonZeroCntPerColumn, numSampleRow, numTotalRow,
                                               pmString, ref _handle), nameof(PInvoke.DatasetCreateFromSampledColumn));
                }
            }
            finally
            {
                for (int i = 0; i < numCol; i++)
                {
                    if (gcValues[i].IsAllocated)
                    {
                        gcValues[i].Free();
                    }
                    if (gcIndices[i].IsAllocated)
                    {
                        gcIndices[i].Free();
                    }
                }
                ;
            }
            if (labels != null)
            {
                SetLabels(labels);
            }
            if (weights != null)
            {
                SetWeights(weights);
            }
            if (groups != null)
            {
                SetGroups(groups);
            }

            if (NumFeatures != numCol)
            {
                throw new Exception("Expected GetNumCols to be equal to numCol");
            }

            if (NumRows != numTotalRow)
            {
                throw new Exception("Expected GetNumRows to be equal to numTotalRow");
            }
        }
コード例 #11
0
 private Dataset(IntPtr h, CommonParameters cp, DatasetParameters dp)
 {
     _handle           = h;
     CommonParameters  = cp;
     DatasetParameters = dp;
 }
コード例 #12
0
        public unsafe Dataset(float[][] data,
                              int numCol,
                              CommonParameters cp,
                              DatasetParameters dp,
                              float[] labels    = null,
                              float[] weights   = null,
                              int[] groups      = null,
                              Dataset reference = null)
        {
            CommonParameters  = cp;
            DatasetParameters = dp;
            var pmString = ParamsToString(cp, dp);

            _handle = IntPtr.Zero;

            var gcHandles = new List <GCHandle>(data.Length);

            try
            {
                float *[] dataPtrs = new float *[data.Length];
                int[]     nRows    = new int[data.Length];
                for (int i = 0; i < data.Length; i++)
                {
                    var hdl = GCHandle.Alloc(data[i], GCHandleType.Pinned);
                    gcHandles.Add(hdl);
                    dataPtrs[i] = (float *)hdl.AddrOfPinnedObject().ToPointer();
                    nRows[i]    = 1;
                }
                ;
                fixed(float **dataPtr = dataPtrs)
                fixed(int *nRowsPtr = nRows)
                {
                    PInvokeException.Check(PInvoke.DatasetCreateFromMats(
                                               data.Length,
                                               dataPtr,
                                               nRowsPtr,
                                               numCol,
                                               /*isRowMajor*/ true,
                                               pmString,
                                               reference?._handle ?? IntPtr.Zero,
                                               ref _handle
                                               ), nameof(PInvoke.DatasetCreateFromMats));
                }
            }
            finally
            {
                foreach (var hdl in gcHandles)
                {
                    if (hdl.IsAllocated)
                    {
                        hdl.Free();
                    }
                }
                ;
            }
            if (labels != null)
            {
                SetLabels(labels);
            }
            if (weights != null)
            {
                SetWeights(weights);
            }
            if (groups != null)
            {
                SetGroups(groups);
            }

            if (NumFeatures != numCol)
            {
                throw new Exception("Expected GetNumCols to be equal to numCol");
            }

            if (NumRows != data.Length)
            {
                throw new Exception("Expected GetNumRows to be equal to numTotalRow");
            }
        }
コード例 #13
0
 ////////////////////////////////////////////////
 /// TODO: JsChartGenerator:methods
 ////////////////////////////////////////////////
 string GetDataSet(IEnumerable <string> data, DatasetParameters parameters)
 {
     return(@$ " var dataFirst = {{
                     data: [{string.Concat(data.Select(l => l + ", "))}],
コード例 #14
0
ファイル: DatasetTest.cs プロジェクト: dave-c/LightGBM.Net
        public static Dataset CreateRandom(Random rand)
        {
            var numTotalRow = rand.Next(100, 500);
            var numColumns  = rand.Next(1, 10);

            var cp = new CommonParameters();
            var dp = new DatasetParameters
            {
                MinDataInLeaf = 1,
                MinDataInBin  = 1
            };

            if (rand.Next(3) == 0)
            {
                var rows = new float[numTotalRow][];
                for (int i = 0; i < numTotalRow; ++i)
                {
                    var row = new float[numColumns];
                    for (int j = 0; j < row.Length; ++j)
                    {
                        row[j] = (float)rand.NextDouble();
                    }
                    rows[i] = row;
                }

                var ds = new Dataset(rows, numColumns, cp, dp);
                Assert.Equal(numTotalRow, ds.NumRows);
                Assert.Equal(numColumns, ds.NumFeatures);
                return(ds);
            }
            else if (rand.Next(2) == 0)
            {
                var data = new float[numTotalRow, numColumns];
                for (int i = 0; i < numTotalRow; ++i)
                {
                    for (int j = 0; j < numColumns; ++j)
                    {
                        if (rand.Next(3) == 0)
                        {
                            data[i, j] = (float)rand.NextDouble();
                        }
                    }
                }

                var dataSparse = Dense2Sparse(data);

                var ds = new Dataset(dataSparse, numColumns, cp, dp);
                Assert.Equal(numTotalRow, ds.NumRows);
                Assert.Equal(numColumns, ds.NumFeatures);
                return(ds);
            }
            else
            {
                var numSampleRow = rand.Next(5, numTotalRow);

                var columns = new double[numColumns][];
                for (int i = 0; i < numColumns; ++i)
                {
                    var col = new double[numTotalRow];
                    for (int j = 0; j < col.Length; ++j)
                    {
                        col[j] = rand.NextDouble();
                    }
                    columns[i] = col;
                }

                // select the sample indices
                var sampleIndices = new int[numColumns][];
                for (int i = 0; i < numColumns; ++i)
                {
                    var sampleIndex = new int[numTotalRow];
                    for (int j = 0; j < numSampleRow; ++j)
                    {
                        sampleIndex[j] = j;
                    }
                    sampleIndices[i] = sampleIndex;
                }

                var sizePerColumn = new int[numColumns];
                for (int i = 0; i < numColumns; ++i)
                {
                    sizePerColumn[i] = numTotalRow;
                }

                var ds = new Dataset(columns,
                                     sampleIndices,
                                     numColumns,
                                     sizePerColumn,
                                     numSampleRow,
                                     numTotalRow,
                                     cp,
                                     dp);
                Assert.Equal(numTotalRow, ds.NumRows);
                Assert.Equal(numColumns, ds.NumFeatures);
                return(ds);
            }
        }