Exemplo n.º 1
0
        public static void CreateTest()
        {
            Console.WriteLine("读取用户基本数据(测试集)");
            var user_test = BasicInfo_Test.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\test\test_user.csv");

            Console.WriteLine("件数:" + user_test.Count);

            Console.WriteLine("读取用户APP数据(测试集)");
            var app_test = AppInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\test\test_app.csv");

            Console.WriteLine("件数:" + app_test.Count);

            Console.WriteLine("读取用户VOC数据(测试集)");
            var voc_test = VocInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\test\test_voc.csv");

            Console.WriteLine("件数:" + voc_test.Count);

            Console.WriteLine("读取用户SMS数据(测试集)");
            var sms_test = SMSInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\test\test_sms.csv");

            Console.WriteLine("件数:" + sms_test.Count);

            //app数据分组
            output(user_test, App_Agg.GetAgg(app_test), Voc_Agg.GetAgg(voc_test), SMS_Agg.GetAgg(sms_test), @"F:\诈骗电话识别\诈骗电话号码识别-0527\test_all.csv");
        }
Exemplo n.º 2
0
    public static List <VocInfo> ReadCSV(string filename)
    {
        var df = new List <VocInfo>();
        var sr = new StreamReader(filename);

        sr.ReadLine();  //第一行
        while (!sr.EndOfStream)
        {
            var rawinfo = sr.ReadLine().Split(",");
            var r       = new VocInfo();
            r.phone_no_m     = rawinfo[0];
            r.opposite_no_m  = rawinfo[1];
            r.calltype_id    = rawinfo[2];
            r.start_datetime = DateTime.Parse(rawinfo[3]);
            r.call_dur       = int.Parse(rawinfo[4]);
            r.city_name      = rawinfo[5];
            r.county_name    = rawinfo[6];
            r.imei_m         = rawinfo[7];
            df.Add(r);
        }
        sr.Close();
        df.Sort((x, y) =>
        {
            if (x.phone_no_m == y.phone_no_m)
            {
                return(x.start_datetime.CompareTo(y.start_datetime));
            }
            else
            {
                return(x.phone_no_m.CompareTo(y.phone_no_m));
            }
        });
        for (int i = 0; i < df.Count - 1; i++)
        {
            var c = df[i];
            var n = df[i + 1];
            if (c.phone_no_m == n.phone_no_m)
            {
                c.next_call_septiem = (int)n.start_datetime.Subtract(c.start_datetime).TotalSeconds - c.call_dur;
            }
            else
            {
                c.next_call_septiem = int.MaxValue;
            }
        }
        return(df);
    }
Exemplo n.º 3
0
        public static void CreateTrain()
        {
            Console.WriteLine("读取用户基本数据(训练集)");
            var user_train = BasicInfo_Train.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_user.csv");

            Console.WriteLine("件数:" + user_train.Count);
            Console.WriteLine("正样本:" + user_train.Count(x => x.label == "1"));
            Console.WriteLine("负样本:" + user_train.Count(x => x.label == "0"));

            Console.WriteLine("读取用户APP数据(训练集)");
            var app_train = AppInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_app.csv");

            Console.WriteLine("件数:" + app_train.Count);
            var app_agg = App_Agg.GetAgg(app_train);

            app_train.Clear();
            GC.Collect();

            Console.WriteLine("读取用户VOC数据(训练集)");
            var voc_train = VocInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_voc.csv");

            Console.WriteLine("件数:" + voc_train.Count);
            var voc_agg = Voc_Agg.GetAgg(voc_train);

            voc_train.Clear();
            GC.Collect();

            Console.WriteLine("读取用户SMS数据(训练集)");
            var sms_train = SMSInfo.ReadCSV(@"F:\诈骗电话识别\诈骗电话号码识别-0527\train\train_sms.csv");

            Console.WriteLine("件数:" + sms_train.Count);
            var sms_agg = SMS_Agg.GetAgg(sms_train);

            sms_train.Clear();
            GC.Collect();

            output(user_train, app_agg, voc_agg, sms_agg, @"F:\诈骗电话识别\诈骗电话号码识别-0527\train_all.csv");
        }