Esempio n. 1
0
        public Scheduler()
        {
            var xgbClassifier = new XGBClassifier();

            foreach (var worker in _workers.Values)
            {
                worker.Initialize(xgbClassifier);

                worker.NotBusy += Worker_NotBusy;
            }
        }
Esempio n. 2
0
        public void Predict()
        {
            var dataTrain   = TestUtils.GetClassifierDataTrain();
            var labelsTrain = TestUtils.GetClassifierLabelsTrain();
            var dataTest    = TestUtils.GetClassifierDataTest();

            var xgbc = new XGBClassifier();

            xgbc.Fit(dataTrain, labelsTrain);
            var preds = xgbc.Predict(dataTest);

            Assert.IsTrue(TestUtils.ClassifierPredsCorrect(preds));
        }
Esempio n. 3
0
        public void TestClassifierDump()
        {
            var dataTrain   = TestUtils.GetClassifierDataTrain();
            var labelsTrain = TestUtils.GetClassifierLabelsTrain();
            var dataTest    = TestUtils.GetClassifierDataTest();

            var xgbc = new XGBClassifier();

            xgbc.Fit(dataTrain, labelsTrain);

            var preds1      = xgbc.PredictProba(dataTest);
            var description = xgbc.DumpModelEx();

            Console.WriteLine("Model Dumped: " + description);
        }
Esempio n. 4
0
        public void TestClassifierSaveAndLoadWithParameters()
        {
            var dataTrain   = TestUtils.GetClassifierDataTrain();
            var labelsTrain = TestUtils.GetClassifierLabelsTrain();
            var dataTest    = TestUtils.GetClassifierDataTest();

            var xgbc = new XGBClassifier(10, 0.01f, 50);

            xgbc.Fit(dataTrain, labelsTrain);

            var preds1 = xgbc.PredictProba(dataTest);

            xgbc.SaveModelToFile(TEST_FILE);

            var xgbc2  = BaseXgbModel.LoadClassifierFromFile(TEST_FILE);
            var preds2 = xgbc2.PredictProba(dataTest);

            Assert.IsTrue(TestUtils.AreEqual(preds1, preds2));
        }
Esempio n. 5
0
        private void DetectBotsAndSave(UserProfileGathering gath, Dictionary <long, double[]> groupmapping, XGBClassifier xgbc, List <UserGet> users)
        {
            var vectors = users.Select(z => z.ToVector(groupmapping)).ToArray();
            var preds   = xgbc.Predict(vectors);

            _log.Information("Thread {GathId} intended to save {NoBotsUserCount}/{UserCount} accounts", gath.Id, preds.Count(z => z == 0), users.Count);

            var tmp   = new List <UserGet>();
            var isbot = new List <Tuple <int, bool> >();

            for (int usr = 0; usr < users.Count; usr++)
            {
                isbot.Add(new Tuple <int, bool>(users[usr].id, preds[usr] == 1));
                if (preds[usr] == 0)
                {
                    tmp.Add(users[usr]);
                }
            }

            if (isbot.Any())
            {
                _antiBotRepository.SaveUsers(isbot);
            }

            if (tmp.Any())
            {
                _repo.SaveUsers(tmp, DateTime.Now);
            }
        }
Esempio n. 6
0
 public void AddLevel(int questionNum, XGBClassifier regressor)
 {
     percentiles.Add(questionNum);
     xgb.Add(regressor);
 }
Esempio n. 7
0
        private static void AntiBot()
        {
            //var X = JsonConvert.DeserializeObject<List<float[]>>(File.ReadAllText("X.json"));
            //var Y = JsonConvert.DeserializeObject<List<float>>(File.ReadAllText("Y.json"));

            var X = new List <float[]>();
            var Y = new List <float>();

            var pt1 = ReadRepo("nopack_nobot_ferrets.s3db", 0.0f);
            var pt2 = ReadRepo("nopack_bots_ferrets.s3db", 1.0f);

            X.AddRange(pt1.x);
            X.AddRange(pt2.x);
            Y.AddRange(pt1.y);
            Y.AddRange(pt2.y);

            File.WriteAllText("antibot_xy.csv", "Y," + string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(102, 0)) + "\r\n");
            File.AppendAllLines("antibot_xy.csv", X.Select((z, i) => (int)Y[i] + ", " + string.Join(", ", z.Select(x => x.ToString(CultureInfo.InvariantCulture)))));
            File.WriteAllText("antibot_x.csv", string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(102, 0)) + "\r\n");
            File.AppendAllLines("antibot_x.csv", X.Select((z, i) => string.Join(", ", z.Select(x => x.ToString(CultureInfo.InvariantCulture)))));

            //File.WriteAllText("X.json", JsonConvert.SerializeObject(X));
            //File.WriteAllText("Y.json", JsonConvert.SerializeObject(Y));

            var rnd   = new Random(Environment.TickCount);
            var train = Enumerable.Range(0, X.Count).OrderBy(z => rnd.NextDouble()).ToArray();

            var x_train = train.Take(X.Count * 80 / 100).Select(z => X[z]).ToArray();
            var y_train = train.Take(X.Count * 80 / 100).Select(z => Y[z]).ToArray();
            var x_test  = train.Skip(X.Count * 80 / 100).Select(z => X[z]).ToArray();
            var y_test  = train.Skip(X.Count * 80 / 100).Select(z => Y[z]).ToArray();

            //StringBuilder sb = new StringBuilder();
            //sb.AppendLine($"Age;Sex");
            //for (int pos = 0; pos < X.Count; pos++)
            //    if (Y[pos] == 0)
            //    {
            //        sb.AppendLine($"{X[pos][0]};{X[pos][19]}");
            //    }
            //File.WriteAllText("socdem.csv", sb.ToString());

            var parameters = new Dictionary <string, object>();

            parameters["max_depth"]     = 10;
            parameters["learning_rate"] = 0.1f;
            parameters["n_estimators"]  = 500;
            parameters["silent"]        = true;
            parameters["objective"]     = "multi:softprob";//"binary:logistic";//

            parameters["nthread"]           = -1;
            parameters["gamma"]             = 0f;
            parameters["min_child_weight"]  = 1;
            parameters["max_delta_step"]    = 1;
            parameters["subsample"]         = 1f;
            parameters["colsample_bytree"]  = 1f;
            parameters["colsample_bylevel"] = 1f;
            parameters["reg_alpha"]         = 1.5f;
            parameters["reg_lambda"]        = 1f;
            parameters["scale_pos_weight"]  = 1f;

            parameters["base_score"] = 0.5F;
            parameters["seed"]       = 0;
            parameters["missing"]    = float.NaN;
            parameters["num_class"]  = 2;
            using (var txgbc = new XGBClassifier(parameters))
            {
                txgbc.Fit(x_train, y_train);
                txgbc.SaveModelToFile("ext_trained_model.xgb");
            }

            using (var xgbc = BaseXgbModel.LoadClassifierFromFile("ext_trained_model.xgb"))
            {
                xgbc.SetParameter("num_class", 2);
                var testDiscrepancy  = 0.0;
                var trainDiscrepancy = 0.0;
                var preds            = xgbc.Predict(x_test);
                for (int pos = 0; pos < preds.Length; pos++)
                {
                    testDiscrepancy += Math.Abs(y_test[pos] - preds[pos]) < 1e-3 ? 1 : 0;
                }
                testDiscrepancy /= preds.Length;

                preds = xgbc.Predict(x_train);
                for (int pos = 0; pos < preds.Length; pos++)
                {
                    trainDiscrepancy += Math.Abs(y_train[pos] - preds[pos]) < 1e-3 ? 1 : 0;
                }
                trainDiscrepancy /= preds.Length;
                Console.WriteLine("Train/Test Quality {0}/{1}", trainDiscrepancy, testDiscrepancy);
            }
            Console.ReadKey();
        }
Esempio n. 8
0
        private static Fifther Fifth()
        {
            var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json"));
            var bagOfTerms   = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet <string>(z[1].Split(' ')));
            var xFilename    = "224053984_dataset.json";

            var           scope      = new QuestionnaireScope();
            XmlSerializer serializer = new XmlSerializer(typeof(QuestionnaireScope));

            using (var reader = new StreamReader("test_fifth.xml"))
            {
                scope = (QuestionnaireScope)serializer.Deserialize(reader);
            }

            var log = new LoggerConfiguration()
                      .MinimumLevel.Verbose()
                      .WriteTo.LiterateConsole()
                      .CreateLogger();
            var repository = new UserGetRepository("passed_tests_ferrets.s3db", log, new CompressorProto());
            Dictionary <int, float[]> vectors = new Dictionary <int, float[]>();
            var       users = new UserGet[0];
            var       count = 0;
            const int batch = 1000;

            do
            {
                try
                {
                    users = repository.RangeSelect(count, batch).ToArray();
                    foreach (var user in users)
                    {
                        vectors.Add(user.id, user.ToVector(groupmapping, bagOfTerms));
                    }
                    count += batch;
                    log.Information("Done {Count} recs.", count);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            } while (users.Length != 0);

            //File.WriteAllText("datax.csv", string.Join(", ", FlatUsertToVectorMapping.GetHeader(groupmapping.FirstOrDefault().Value.Length, bagOfTerms.Count)) + "\r\n");
            //File.AppendAllLines("datax.csv", vectors.Select(z => string.Join(", ", z.Value.Select(x => x.ToString("0.000", CultureInfo.InvariantCulture)))));

            var X = JsonConvert.DeserializeObject <List <FifthAttendance> >(File.ReadAllText(xFilename)).Where(z => vectors.ContainsKey(z.vkid)).Where(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId).Length == 120).ToArray();
            var Y = X.Select(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId)).ToArray();

            File.WriteAllLines("answers.csv", Y.Select(z => string.Join(", ", z)));


            var rnd   = new Random(Environment.TickCount);
            var train = Enumerable.Range(0, X.Length).OrderBy(z => rnd.NextDouble()).ToArray();

            var x_train = train.Take(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray();
            var y_train = train.Take(X.Length * 80 / 100).Select(z => Y[z]).ToArray();
            var x_test  = train.Skip(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray();
            var y_test  = train.Skip(X.Length * 80 / 100).Select(z => Y[z]).ToArray();

            var fifther = new Fifther();

            Console.WriteLine();

            for (int qnum = 0; qnum < 120; qnum++)
            {
                Console.WriteLine($"Question: {qnum}");
                var yds  = y_train.Select(z => (float)z[qnum]).ToArray();
                var ytds = y_test.Select(z => (float)z[qnum]).ToArray();

                var parameters = new Dictionary <string, object>();
                parameters["max_depth"]     = 10;
                parameters["learning_rate"] = 0.1f;
                parameters["n_estimators"]  = 300;
                parameters["silent"]        = true;
                parameters["objective"]     = "multi:softprob";//"binary:logistic";//

                parameters["nthread"]           = -1;
                parameters["gamma"]             = 4f;
                parameters["min_child_weight"]  = 2;
                parameters["max_delta_step"]    = 1;
                parameters["subsample"]         = 1f;
                parameters["colsample_bytree"]  = 1f;
                parameters["colsample_bylevel"] = 1f;
                parameters["reg_alpha"]         = 0f;
                parameters["reg_lambda"]        = 1f;
                parameters["scale_pos_weight"]  = 1f;

                parameters["base_score"] = 0.8F;
                parameters["seed"]       = 0;
                parameters["missing"]    = float.NaN;
                parameters["num_class"]  = 5;
                var xgbc = new XGBClassifier(parameters);
                xgbc.Fit(x_train, yds);

                fifther.AddLevel(qnum, xgbc);

                var discrepancy = 0.0;
                var dist        = 0.0;
                var preds       = xgbc.PredictDistr(x_train);

                for (int pos = 0; pos < preds.Length; pos++)
                {
                    var tmp = new float[5];
                    tmp[(int)yds[pos]] = 1f;
                    dist        += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]);
                    discrepancy += det.EuclidianDistance(tmp, preds[pos]);
                }
                Console.WriteLine("[Train] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length);
                preds       = xgbc.PredictDistr(x_test);
                discrepancy = 0.0;
                dist        = 0.0;
                for (int pos = 0; pos < preds.Length; pos++)
                {
                    var tmp = new float[5];
                    tmp[(int)yds[pos]] = 1f;
                    dist        += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]);
                    discrepancy += det.EuclidianDistance(tmp, preds[pos]);
                }
                Console.WriteLine("[Test ] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length);
            }
            Console.WriteLine("Done");
            fifther.Save(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "fifth"));
            Console.ReadLine();
            Console.ReadLine();
            Console.ReadLine();
            Console.ReadLine();
            return(fifther);
        }
Esempio n. 9
0
 public void Initialize(XGBClassifier classifier)
 {
     _classifier = classifier;
     ID          = Guid.NewGuid();
 }