Example #1
0
        /// <summary>
        /// Reads data from train and test files, pre-modification
        /// </summary>
        public override void LoadData()
        {
            _trainLoader = TargetName != null ? new DataLoader <FType>(TargetName) : new DataLoader <FType>();
            _testLoader  = TargetName != null ? new DataLoader <FType>(TargetName) : new DataLoader <FType>();

            if (!File.Exists(TrainPath))
            {
                Logger.Log("train file " + TrainPath + " not found");
                throw new FileNotFoundException("", TrainPath);
            }

            if (!File.Exists(TestPath))
            {
                Logger.Log("test file " + TestPath + " not found");
                throw new FileNotFoundException("", TestPath);
            }

            // loading train file
            _trainLoader.IsLoadForLearning = true;
            if (IdName != null)
            {
                _trainLoader.AddIdsString(IdName);
            }
            _trainLoader.Load(TrainPath);

            foreach (var key in _trainLoader.TargetProb.Keys)
            {
                Logger.Log("prob[" + key.ToString("F0") + "] = " + _trainLoader.TargetProb[key].ToString("F06"));
            }

            Logger.Log("Outliers to drop: " + (int)(_trainLoader.TotalDataLines * OutliersPrct));

            // loading test file
            foreach (var id in _trainLoader.Ids.Keys) // the same id's
            {
                _testLoader.AddIdColumn(id);
            }

            foreach (var col in _trainLoader.SkippedColumns.Keys) // the same columns
            {
                _testLoader.AddSkipColumn(col);
            }

            // loading test file
            _testLoader.Load(TestPath);

            ModifyData();
        }
Example #2
0
        public override void LoadData()
        {
            string trainPath = "";
            string testPath  = "";
            string ids       = "";
            string target    = "";

            _trainPath = trainPath;
            _testPath  = testPath;
            _target    = target;

            _trainLoader = _target != null ? new DataLoader <FType>(_target) : new DataLoader <FType>();
            _testLoader  = _target != null ? new DataLoader <FType>(_target) : new DataLoader <FType>();

            if (!File.Exists(_trainPath))
            {
                Logger.Log("train file " + _trainPath + " not found");
                throw new FileNotFoundException("", _trainPath);
            }

            if (!File.Exists(_trainPath))
            {
                Logger.Log("test file " + _testPath + " not found");
                throw new FileNotFoundException("", _testPath);
            }

            // loading train file
            _trainLoader.AddIdsString(ids);
            _trainLoader.ProceedRowFunc = ProceedRow;
            _trainLoader.Load(_trainPath);

            _trainProblem = _problemCreator.CreateProblem();

            // loading test file
            foreach (var id in _trainLoader.Ids.Keys) // the same id's
            {
                _testLoader.AddIdColumn(id);
            }

            foreach (var col in _trainLoader.SkippedColumns.Keys) // the same columns
            {
                _testLoader.AddSkipColumn(col);
            }

            // loading
            _testLoader.Load(_testPath);

            _testDataDict = new Dictionary <string, List <double[]> >(); // тестовые данные: id -> список строк на данный id
            _resultDict   = new Dictionary <string, int>();              // результат тестовых данных: id -> target

            // модифицируем тестовые данные
            foreach (var row in _testLoader.Rows)
            {
                // сохраняем результат
                if (!_resultDict.ContainsKey(row.Id))
                {
                    _resultDict.Add(row.Id, Convert.ToInt32(row.Target));
                }

                // сохраняем ответ из бюро
                var txy = new double[_testLoader.NVars];
                for (int k = 0; k < _testLoader.NVars; k++)
                {
                    txy[k] = row.Values[k];
                }
                if (!_testDataDict.ContainsKey(row.Id))
                {
                    _testDataDict.Add(row.Id, new List <double[]>());
                }
                _testDataDict[row.Id].Add(txy);
            }
        }
Example #3
0
        /// <summary>
        /// Reads data from train and test files, pre-modification
        /// </summary>
        public override void LoadData()
        {
            _trainLoader = TargetName != null ? new DataLoader<FType>(TargetName) : new DataLoader<FType>();
            _testLoader = TargetName != null ? new DataLoader<FType>(TargetName) : new DataLoader<FType>();

            if (!File.Exists(TrainPath))
            {
                Logger.Log("train file " + TrainPath + " not found");
                throw new FileNotFoundException("", TrainPath);
            }

            if (!File.Exists(TestPath))
            {
                Logger.Log("test file " + TestPath + " not found");
                throw new FileNotFoundException("", TestPath);
            }

            // loading train file
            _trainLoader.IsLoadForLearning = true;
            _trainLoader.AddIdsString(IdName);
            _trainLoader.Load(TrainPath);

            foreach (var key in _trainLoader.TargetProb.Keys)
                Logger.Log("prob[" + key.ToString("F0") + "] = " + _trainLoader.TargetProb[key].ToString("F06"));

            Logger.Log("Outliers to drop: " + (int)(_trainLoader.TotalDataLines * OutliersPrct));

            // loading test file
            foreach (var id in _trainLoader.Ids.Keys) // the same id's
                _testLoader.AddIdColumn(id);

            foreach (var col in _trainLoader.SkippedColumns.Keys) // the same columns
                _testLoader.AddSkipColumn(col);

            // loading test file
            _testLoader.Load(TestPath);

            ModifyData();
        }
Example #4
0
        public override void LoadData()
        {
            string trainPath="";
            string testPath="";
            string ids="";
            string target="";

            _trainPath = trainPath;
            _testPath = testPath;
            _target = target;

            _trainLoader = _target != null ? new DataLoader<FType>(_target) : new DataLoader<FType>();
            _testLoader = _target != null ? new DataLoader<FType>(_target) : new DataLoader<FType>();

            if (!File.Exists(_trainPath))
            {
                Logger.Log("train file " + _trainPath + " not found");
                throw new FileNotFoundException("", _trainPath);
            }

            if (!File.Exists(_trainPath))
            {
                Logger.Log("test file " + _testPath + " not found");
                throw new FileNotFoundException("", _testPath);
            }

            // loading train file
            _trainLoader.AddIdsString(ids);
            _trainLoader.ProceedRowFunc = ProceedRow;
            _trainLoader.Load(_trainPath);

            _trainProblem = _problemCreator.CreateProblem();

            // loading test file
            foreach (var id in _trainLoader.Ids.Keys) // the same id's
                _testLoader.AddIdColumn(id);

            foreach (var col in _trainLoader.SkippedColumns.Keys) // the same columns
                _testLoader.AddSkipColumn(col);

            // loading
            _testLoader.Load(_testPath);

            _testDataDict = new Dictionary<string, List<double[]>>(); // тестовые данные: id -> список строк на данный id
            _resultDict = new Dictionary<string, int>(); // результат тестовых данных: id -> target

            // модифицируем тестовые данные
            foreach (var row in _testLoader.Rows)
            {
                // сохраняем результат
                if (!_resultDict.ContainsKey(row.Id))
                    _resultDict.Add(row.Id, Convert.ToInt32(row.Target));

                // сохраняем ответ из бюро
                var txy = new double[_testLoader.NVars];
                for (int k = 0; k < _testLoader.NVars; k++)
                {
                    txy[k] = row.Coeffs[k];
                }
                if (!_testDataDict.ContainsKey(row.Id))
                    _testDataDict.Add(row.Id, new List<double[]>());
                _testDataDict[row.Id].Add(txy);
            }
        }
Example #5
0
 public void AddDropColumn(string col)
 {
     _trainLoader.AddSkipColumn(col);
 }