public static void CreateTest() { Console.WriteLine("读取用户基本数据(测试集)"); var user_test = BasicInfo_Test.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\test\test_user.csv"); Console.WriteLine("件数:" + user_test.Count); Console.WriteLine("读取用户APP数据(测试集)"); var app_test = AppInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\test\test_app.csv"); Console.WriteLine("件数:" + app_test.Count); Console.WriteLine("读取用户VOC数据(测试集)"); var voc_test = VocInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\test\test_voc.csv"); Console.WriteLine("件数:" + voc_test.Count); Console.WriteLine("读取用户SMS数据(测试集)"); var sms_test = SMSInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\test\test_sms.csv"); Console.WriteLine("件数:" + sms_test.Count); //app数据分组 output(user_test, App_Agg.GetAgg(app_test), Voc_Agg.GetAgg(voc_test), SMS_Agg.GetAgg(sms_test), @"F:\诈骗电话识别\诈骗电话号码识别-0527\test_all.csv"); }
public static List <VocInfo> ReadCSV(string filename) { var df = new List <VocInfo>(); var sr = new StreamReader(filename); sr.ReadLine(); //第一行 while (!sr.EndOfStream) { var rawinfo = sr.ReadLine().Split(","); var r = new VocInfo(); r.phone_no_m = rawinfo[0]; r.opposite_no_m = rawinfo[1]; r.calltype_id = rawinfo[2]; r.start_datetime = DateTime.Parse(rawinfo[3]); r.call_dur = int.Parse(rawinfo[4]); r.city_name = rawinfo[5]; r.county_name = rawinfo[6]; r.imei_m = rawinfo[7]; df.Add(r); } sr.Close(); df.Sort((x, y) => { if (x.phone_no_m == y.phone_no_m) { return(x.start_datetime.CompareTo(y.start_datetime)); } else { return(x.phone_no_m.CompareTo(y.phone_no_m)); } }); for (int i = 0; i < df.Count - 1; i++) { var c = df[i]; var n = df[i + 1]; if (c.phone_no_m == n.phone_no_m) { c.next_call_septiem = (int)n.start_datetime.Subtract(c.start_datetime).TotalSeconds - c.call_dur; } else { c.next_call_septiem = int.MaxValue; } } return(df); }
public static void CreateTrain() { Console.WriteLine("读取用户基本数据(训练集)"); var user_train = BasicInfo_Train.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_user.csv"); Console.WriteLine("件数:" + user_train.Count); Console.WriteLine("正样本:" + user_train.Count(x => x.label == "1")); Console.WriteLine("负样本:" + user_train.Count(x => x.label == "0")); Console.WriteLine("读取用户APP数据(训练集)"); var app_train = AppInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_app.csv"); Console.WriteLine("件数:" + app_train.Count); var app_agg = App_Agg.GetAgg(app_train); app_train.Clear(); GC.Collect(); Console.WriteLine("读取用户VOC数据(训练集)"); var voc_train = VocInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_voc.csv"); Console.WriteLine("件数:" + voc_train.Count); var voc_agg = Voc_Agg.GetAgg(voc_train); voc_train.Clear(); GC.Collect(); Console.WriteLine("读取用户SMS数据(训练集)"); var sms_train = SMSInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_sms.csv"); Console.WriteLine("件数:" + sms_train.Count); var sms_agg = SMS_Agg.GetAgg(sms_train); sms_train.Clear(); GC.Collect(); output(user_train, app_agg, voc_agg, sms_agg, @"F:\诈骗电话识别\诈骗电话号码识别-0527\train_all.csv"); }