public static List <BasicInfo_Train> ReadCSV(string filename) { var df = new List <BasicInfo_Train>(); var sr = new StreamReader(filename); sr.ReadLine(); //第一行 while (!sr.EndOfStream) { var rawinfo = sr.ReadLine().Split(","); var r = new BasicInfo_Train(); r.phone_no_m = rawinfo[0]; r.city_name = rawinfo[1]; r.county_name = rawinfo[2]; r.idcard_cnt = int.Parse(rawinfo[3]); r.arpu_201908 = Common.ParseDouble(rawinfo[4]); r.arpu_201909 = Common.ParseDouble(rawinfo[5]); r.arpu_201910 = Common.ParseDouble(rawinfo[6]); r.arpu_201911 = Common.ParseDouble(rawinfo[7]); r.arpu_201912 = Common.ParseDouble(rawinfo[8]); r.arpu_202001 = Common.ParseDouble(rawinfo[9]); r.arpu_202002 = Common.ParseDouble(rawinfo[10]); r.arpu_202003 = Common.ParseDouble(rawinfo[11]); r.arpu_avg = r.arpu_avg_c(); r.label = rawinfo[12]; df.Add(r); } sr.Close(); return(df); }
public static void CreateTrain() { Console.WriteLine("读取用户基本数据(训练集)"); var user_train = BasicInfo_Train.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_user.csv"); Console.WriteLine("件数:" + user_train.Count); Console.WriteLine("正样本:" + user_train.Count(x => x.label == "1")); Console.WriteLine("负样本:" + user_train.Count(x => x.label == "0")); Console.WriteLine("读取用户APP数据(训练集)"); var app_train = AppInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_app.csv"); Console.WriteLine("件数:" + app_train.Count); var app_agg = App_Agg.GetAgg(app_train); app_train.Clear(); GC.Collect(); Console.WriteLine("读取用户VOC数据(训练集)"); var voc_train = VocInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_voc.csv"); Console.WriteLine("件数:" + voc_train.Count); var voc_agg = Voc_Agg.GetAgg(voc_train); voc_train.Clear(); GC.Collect(); Console.WriteLine("读取用户SMS数据(训练集)"); var sms_train = SMSInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_sms.csv"); Console.WriteLine("件数:" + sms_train.Count); var sms_agg = SMS_Agg.GetAgg(sms_train); sms_train.Clear(); GC.Collect(); output(user_train, app_agg, voc_agg, sms_agg, @"F:\诈骗电话识别\诈骗电话号码识别-0527\train_all.csv"); }