Esempio n. 1
0
 public static void LoadTrainSets(out FileFacturation fileFactTrain, out FilePaiements filePaieTrain,
                                  out FilePerformance filePerfTrain, out FileTransactions fileTranTrain)
 {
     fileFactTrain = FileFacturation.LoadCsvFile(@"Data/facturation_train.csv");
     filePaieTrain = FilePaiements.LoadCsvFile(@"Data/paiements_train.csv");
     filePerfTrain = FilePerformance.LoadCsvFile(@"Data/performance_train.csv");
     fileTranTrain = FileTransactions.LoadCsvFile(@"Data/transactions_train.csv");
 }
Esempio n. 2
0
 public static void LoadTestSets(out FileFacturation fileFactTest, out FilePaiements filePaieTest,
                                 out FilePerformance filePerfTest, out FileTransactions fileTranTest)
 {
     fileFactTest = FileFacturation.LoadCsvFile(@"Data/facturation_test.csv");
     filePaieTest = FilePaiements.LoadCsvFile(@"Data/paiements_test.csv");
     filePerfTest = FilePerformance.LoadCsvFile(@"Data/performance_test.csv");
     fileTranTest = FileTransactions.LoadCsvFile(@"Data/transactions_test.csv");
     //fileSolution = FileSolution.LoadCsvFile(@"Data/sample_solution.csv");
 }
Esempio n. 3
0
        /// <summary>
        /// Get contents of CSV files sorted by descending dates. Note that the keys themselves are unsorted.
        /// </summary>
        private static void GroupDataByClient(FileFacturation fileFact, FilePaiements filePaie,
                                              FilePerformance filePerf, FileTransactions fileTran,
                                              out DicFact dicFact, out DicPaie dicPaie, out DicPerf dicPerf, out DicTran dicTran)
        {
            dicFact = new DicFact();
            foreach (var row in fileFact.rows)
            {
                if (!dicFact.ContainsKey(row.ID_CPTE))
                {
                    dicFact.Add(row.ID_CPTE, new List <DataFacturation>());
                }
                dicFact[row.ID_CPTE].Add(row);
            }
            dicFact = dicFact.ToDictionary(d => d.Key, d => Utils.SortMostRecentFirst(d.Value));

            dicPaie = new DicPaie();
            foreach (var row in filePaie.rows)
            {
                if (!dicPaie.ContainsKey(row.ID_CPTE))
                {
                    dicPaie.Add(row.ID_CPTE, new List <DataPaiements>());
                }
                dicPaie[row.ID_CPTE].Add(row);
            }
            dicPaie = dicPaie.ToDictionary(d => d.Key, d => Utils.SortMostRecentFirst(d.Value));

            dicPerf = new DicPerf();
            foreach (var row in filePerf.rows)
            {
                if (!dicPerf.ContainsKey(row.ID_CPTE))
                {
                    dicPerf.Add(row.ID_CPTE, row);
                }
            }
            // no need to sort, because there's only one element, not a list


            dicTran = new DicTran();
            foreach (var row in fileTran.rows)
            {
                if (!dicTran.ContainsKey(row.ID_CPTE))
                {
                    dicTran.Add(row.ID_CPTE, new List <DataTransactions>());
                }
                dicTran[row.ID_CPTE].Add(row);
            }
            dicTran = dicTran.ToDictionary(d => d.Key, d => Utils.SortMostRecentFirst(d.Value));
        }
Esempio n. 4
0
        public static void GetDataFromCsvFiles(out DicFact dicFact, out DicPaie dicPaie,
                                               out DicPerf dicPerf, out DicTran dicTran, bool trainNotTest, bool useFull)
        {
            FileFacturation fileFact = null; FilePaiements filePaie = null;
            FilePerformance filePerf = null; FileTransactions fileTran = null;

            if (trainNotTest)
            {
                LoadTrainSets(out fileFact, out filePaie, out filePerf, out fileTran);
            }
            else
            {
                LoadTestSets(out fileFact, out filePaie, out filePerf, out fileTran);
            }

            // Temporarily remove clients not in transactions file
            var listUniqueClients = new List <string>();

            filePerf.rows.ForEach(e => listUniqueClients.Add(e.ID_CPTE));
            listUniqueClients = listUniqueClients.Distinct().ToList();
            if (!useFull)
            {
                listUniqueClients = listUniqueClients.GetRange(0, 100); // DebugOnly: get only a few clients to speed testing up
            }
            fileFact.rows = fileFact.rows.FindAll(a => listUniqueClients.Contains(a.ID_CPTE));
            filePaie.rows = filePaie.rows.FindAll(a => listUniqueClients.Contains(a.ID_CPTE));
            filePerf.rows = filePerf.rows.FindAll(a => listUniqueClients.Contains(a.ID_CPTE));
            fileTran.rows = fileTran.rows.FindAll(a => listUniqueClients.Contains(a.ID_CPTE));

            // Group dataset by client
            GroupDataByClient(fileFact, filePaie, filePerf, fileTran, out dicFact, out dicPaie, out dicPerf, out dicTran);


            // Remove all data after the requested prediction date
            var perfKeys = dicPerf.Keys.ToList();

            foreach (var k in perfKeys)
            {
                if (!dicFact.ContainsKey(k))
                {
                    continue;
                }
                var date = dicPerf[k].PERIODID_MY;
                dicFact[k].RemoveAll(e => date <= e.StatementDate);
            }
            foreach (var k in perfKeys)
            {
                if (!dicPaie.ContainsKey(k))
                {
                    continue;
                }
                var date = dicPerf[k].PERIODID_MY;
                dicPaie[k].RemoveAll(e => date <= e.TRANSACTION_DTTM);
            }
            foreach (var k in perfKeys)
            {
                if (!dicTran.ContainsKey(k))
                {
                    continue;
                }
                var date = dicPerf[k].PERIODID_MY;
                dicTran[k].RemoveAll(e => date <= e.TRANSACTION_DTTM);
            }

            // Save to Binary files
            bool save = false;

            if (save)
            {
                uBin.Serialize("dicFact.bin", dicFact);
                uBin.Serialize("dicPaie.bin", dicPaie);
                uBin.Serialize("dicPerf.bin", dicPerf);
                uBin.Serialize("dicTran.bin", dicTran);
            }
        }