/// <summary> /// Assumption: input data already sorted by most recent first. /// </summary> public static double[] GetTransactions_Max_PRIOR_CREDIT_LIMIT_AMT(int len, DateTime predictionStartDate, List <DataTransactions> list, int days) { double[] nbs = NnRow.CreateArray(len); if (uNet.IsNullOrEmpty(list)) { return(nbs); } var tempList = new List <DataTransactions>(list); DateTime Start = predictionStartDate; DateTime End = predictionStartDate; for (int i = 0; i < nbs.Length && 0 < tempList.Count; i++) { End = Start; Start = End - TimeSpan.FromDays(days); var shortList = tempList.FindAll(e => Start < e.TRANSACTION_DTTM && e.TRANSACTION_DTTM <= End).ToList(); if (shortList.Count <= 0) { continue; } double max = shortList.Max(e => e.PRIOR_CREDIT_LIMIT_AMT); nbs[i] = max; } nbs = NormalizeToOne(nbs, 0, 39709); // -48~+18044 for A, 0~+5427 for B, -5709~+39709 for C, return(nbs); }
/// <summary> /// Assumption: input data already sorted by most recent first. /// </summary> public static double[] GetFacturationCashBalance(int len, DateTime predictionStartDate, List <DataFacturation> list, int days, double min, double max) { double[] nbs = NnRow.CreateArray(len); if (uNet.IsNullOrEmpty(list)) { return(nbs); } var tempList = new List <DataFacturation>(list); DateTime Start = predictionStartDate; DateTime End = predictionStartDate; for (int i = 0; i < nbs.Length && 0 < tempList.Count; i++) { End = Start; Start = End - TimeSpan.FromDays(days); var shortList = tempList.FindAll(e => Start < e.PERIODID_MY && e.PERIODID_MY <= End).ToList(); if (shortList.Count <= 0) { continue; } double val = shortList.Max(e => e.CashBalance); nbs[i] = val; } nbs = NormalizeToOne(nbs, min, max); return(nbs); }
private static void Init_Categories(List <DataTransactions> list, DateTime predictionStartDate, int nbDaysMostRecent, List <string> categories, out double[] nbs, out List <DataTransactions> shortList) { nbs = NnRow.CreateArray(categories.Count); DateTime Start = predictionStartDate; DateTime End = predictionStartDate; Start = End - TimeSpan.FromDays(nbDaysMostRecent); shortList = list.FindAll(e => Start < e.TRANSACTION_DTTM && e.TRANSACTION_DTTM <= End).ToList(); }
public static double[] GetFacturationCurrentTotalBalance(List <DataFacturation> list) { double[] nbs = NnRow.CreateArray(5); if (uNet.IsNullOrEmpty(list)) { return(nbs); } var vals = new List <double>(); for (int i = 0; i < list.Count; i++) { vals.Add(list[i].CurrentTotalBalance); } CalculateStatsAndCopyToArray(nbs, vals); nbs = NormalizeToOne(nbs); return(nbs); }
public static double[] GetPaiementsStats(List <DataPaiements> list) { double[] nbs = NnRow.CreateArray(5); if (uNet.IsNullOrEmpty(list)) { return(nbs); } var vals = new List <double>(); for (int i = 0; i < list.Count; i++) { vals.Add(list[i].TRANSACTION_AMT); } CalculateStatsAndCopyToArray(nbs, vals); nbs = NormalizeToOne(nbs); return(nbs); }
public static List <NnRow> outputs = new List <NnRow>(); // contains all rows of numbers to write for N.Network /// <summary> /// Convert a curation csv file, containing such as textual biz names and addresses, to a csv file with only matched results in double. /// Can return null if the matched file already exist, or if something went wrong during calculations. /// </summary> /// <param name="CsvPath"></param> /// <returns></returns> public static List <NnRow> AnalyzeAndCreateColumnsForNNetwork(bool trainNotTest, bool useFull, bool loadBin) { DicFact dicFact; DicPaie dicPaie; DicPerf dicPerf; DicTran dicTran; if (loadBin) { Utils.GetDataFromBinFiles(out dicFact, out dicPaie, out dicPerf, out dicTran); } else { Utils.GetDataFromCsvFiles(out dicFact, out dicPaie, out dicPerf, out dicTran, trainNotTest, useFull); } //ExportDefaultedTransactions(dicPerf, dicTran); // debugOnly // create csv files containing that embed text in numerical values, by matching parts of business name and address int nbRows = dicPerf.Keys.Count; var clients = dicPerf.Keys.ToList(); var rowArray = new NnRow[nbRows]; // we use an array here, because a list isn't compatible with parallelism /**/ // Using single CPU core, good for debugging for (int i = 0; i < nbRows; i++) { rowArray[i] = CalculateRow(clients[i], dicFact, dicPaie, dicPerf, dicTran); } /* * // Using multiple CPU cores, use only if there's a lot to process * Parallel.For(0, nbRows, new ParallelOptions { MaxDegreeOfParallelism = 4 }, i => * { * rowArray[i] = CalculateRow(clients[i], dicFact, dicPaie, dicPerf, dicTran); * }); * /**/ List <NnRow> dataset = rowArray.ToList(); dataset.RemoveAll(e => null == e); return(dataset); }
/// <summary> /// Assumption: input data already sorted by most recent first. /// </summary> public static double[] GetTransactions(int len, DateTime predictionStartDate, List <DataTransactions> list, int days) { double[] nbs = NnRow.CreateArray(len); if (uNet.IsNullOrEmpty(list)) { return(nbs); } var tempList = new List <DataTransactions>(list); DateTime Start = predictionStartDate; DateTime End = predictionStartDate; for (int i = 0; i < nbs.Length && 0 < tempList.Count; i++) { End = Start; Start = End - TimeSpan.FromDays(days); var shortList = tempList.FindAll(e => Start < e.TRANSACTION_DTTM && e.TRANSACTION_DTTM <= End).ToList(); double sum = shortList.Sum(e => e.TRANSACTION_AMT); nbs[i] = sum; } nbs = NormalizeToOne(nbs); return(nbs); }
/// <summary> /// Assumption: input data already sorted by most recent first. /// </summary> public static double[] GetSpendsAndPaymentsAfterStatement(int len, DateTime predictionStartDate, List <DataFacturation> rowsFact, List <DataPaiements> rowsPaie, List <DataTransactions> rowsTran, int days) { var allStats = new List <double>(); DateTime Start = predictionStartDate; DateTime End = predictionStartDate; for (int i = 0; i < len; i++) { End = Start; Start = End - TimeSpan.FromDays(days); var shortTran = rowsTran.FindAll(e => Start < e.TRANSACTION_DTTM && e.TRANSACTION_DTTM <= End).ToList(); if (shortTran.Count <= 0) { continue; } shortTran = shortTran.OrderByDescending(e => e.TRANSACTION_DTTM).ToList(); var shortPaie = null == rowsPaie ? new List <DataPaiements>() : rowsPaie.FindAll(e => Start < e.TRANSACTION_DTTM && e.TRANSACTION_DTTM <= End).ToList(); if (shortPaie.Count <= 0) { continue; } shortPaie = shortPaie.OrderByDescending(e => e.TRANSACTION_DTTM).ToList(); var shortFact = null == rowsFact ? new List <DataFacturation>() : rowsFact.FindAll(e => (Start - TimeSpan.FromDays(days + 90)) < e.PERIODID_MY && e.PERIODID_MY <= Start).ToList(); shortFact = shortFact.OrderByDescending(e => e.PERIODID_MY).ToList(); double lastBalance = shortFact.Count <= 0? 0: shortFact[0].CurrentTotalBalance; List <(DateTime date, double val)> chronological = new List <(DateTime date, double val)>(); //chronological.Add((new DateTime(1000, 1, 1), lastBalance)); foreach (var e in shortTran) { chronological.Add((e.TRANSACTION_DTTM, e.TRANSACTION_AMT)); } foreach (var e in shortPaie) { chronological.Add((e.TRANSACTION_DTTM, -e.TRANSACTION_AMT)); } chronological = chronological.OrderBy(e => e.date).ToList(); // oldest first var vals = new List <double>(); foreach (var e in chronological) { lastBalance += e.val; vals.Add(lastBalance); } double[] stats = NnRow.CreateArray(4); CalculateStatsAndCopyToArray(stats, vals); stats = NormalizeToOne(stats, -60000, 60000); allStats.AddRange(stats); } // !!! here we want a neutral balance to be 0.5, so we can have both +- values in the 0.0 ~ 1.0 output range double neutral = 0.5; double[] nbs = NnRow.CreateArray(len * 4, neutral); for (int i = 0; i < nbs.Length && i < allStats.Count; i++) { nbs[i] = allStats[i]; } return(nbs); }