public Scheduler() { var xgbClassifier = new XGBClassifier(); foreach (var worker in _workers.Values) { worker.Initialize(xgbClassifier); worker.NotBusy += Worker_NotBusy; } }
public void Predict() { var dataTrain = TestUtils.GetClassifierDataTrain(); var labelsTrain = TestUtils.GetClassifierLabelsTrain(); var dataTest = TestUtils.GetClassifierDataTest(); var xgbc = new XGBClassifier(); xgbc.Fit(dataTrain, labelsTrain); var preds = xgbc.Predict(dataTest); Assert.IsTrue(TestUtils.ClassifierPredsCorrect(preds)); }
public void TestClassifierDump() { var dataTrain = TestUtils.GetClassifierDataTrain(); var labelsTrain = TestUtils.GetClassifierLabelsTrain(); var dataTest = TestUtils.GetClassifierDataTest(); var xgbc = new XGBClassifier(); xgbc.Fit(dataTrain, labelsTrain); var preds1 = xgbc.PredictProba(dataTest); var description = xgbc.DumpModelEx(); Console.WriteLine("Model Dumped: " + description); }
public void TestClassifierSaveAndLoadWithParameters() { var dataTrain = TestUtils.GetClassifierDataTrain(); var labelsTrain = TestUtils.GetClassifierLabelsTrain(); var dataTest = TestUtils.GetClassifierDataTest(); var xgbc = new XGBClassifier(10, 0.01f, 50); xgbc.Fit(dataTrain, labelsTrain); var preds1 = xgbc.PredictProba(dataTest); xgbc.SaveModelToFile(TEST_FILE); var xgbc2 = BaseXgbModel.LoadClassifierFromFile(TEST_FILE); var preds2 = xgbc2.PredictProba(dataTest); Assert.IsTrue(TestUtils.AreEqual(preds1, preds2)); }
private void DetectBotsAndSave(UserProfileGathering gath, Dictionary <long, double[]> groupmapping, XGBClassifier xgbc, List <UserGet> users) { var vectors = users.Select(z => z.ToVector(groupmapping)).ToArray(); var preds = xgbc.Predict(vectors); _log.Information("Thread {GathId} intended to save {NoBotsUserCount}/{UserCount} accounts", gath.Id, preds.Count(z => z == 0), users.Count); var tmp = new List <UserGet>(); var isbot = new List <Tuple <int, bool> >(); for (int usr = 0; usr < users.Count; usr++) { isbot.Add(new Tuple <int, bool>(users[usr].id, preds[usr] == 1)); if (preds[usr] == 0) { tmp.Add(users[usr]); } } if (isbot.Any()) { _antiBotRepository.SaveUsers(isbot); } if (tmp.Any()) { _repo.SaveUsers(tmp, DateTime.Now); } }
public void AddLevel(int questionNum, XGBClassifier regressor) { percentiles.Add(questionNum); xgb.Add(regressor); }
private static void AntiBot() { //var X = JsonConvert.DeserializeObject<List<float[]>>(File.ReadAllText("X.json")); //var Y = JsonConvert.DeserializeObject<List<float>>(File.ReadAllText("Y.json")); var X = new List <float[]>(); var Y = new List <float>(); var pt1 = ReadRepo("nopack_nobot_ferrets.s3db", 0.0f); var pt2 = ReadRepo("nopack_bots_ferrets.s3db", 1.0f); X.AddRange(pt1.x); X.AddRange(pt2.x); Y.AddRange(pt1.y); Y.AddRange(pt2.y); File.WriteAllText("antibot_xy.csv", "Y," + string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(102, 0)) + "\r\n"); File.AppendAllLines("antibot_xy.csv", X.Select((z, i) => (int)Y[i] + ", " + string.Join(", ", z.Select(x => x.ToString(CultureInfo.InvariantCulture))))); File.WriteAllText("antibot_x.csv", string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(0, 0)) + ", " + string.Join(", ", FlatUsertToVectorMapping.GetHeader(102, 0)) + "\r\n"); File.AppendAllLines("antibot_x.csv", X.Select((z, i) => string.Join(", ", z.Select(x => x.ToString(CultureInfo.InvariantCulture))))); //File.WriteAllText("X.json", JsonConvert.SerializeObject(X)); //File.WriteAllText("Y.json", JsonConvert.SerializeObject(Y)); var rnd = new Random(Environment.TickCount); var train = Enumerable.Range(0, X.Count).OrderBy(z => rnd.NextDouble()).ToArray(); var x_train = train.Take(X.Count * 80 / 100).Select(z => X[z]).ToArray(); var y_train = train.Take(X.Count * 80 / 100).Select(z => Y[z]).ToArray(); var x_test = train.Skip(X.Count * 80 / 100).Select(z => X[z]).ToArray(); var y_test = train.Skip(X.Count * 80 / 100).Select(z => Y[z]).ToArray(); //StringBuilder sb = new StringBuilder(); //sb.AppendLine($"Age;Sex"); //for (int pos = 0; pos < X.Count; pos++) // if (Y[pos] == 0) // { // sb.AppendLine($"{X[pos][0]};{X[pos][19]}"); // } //File.WriteAllText("socdem.csv", sb.ToString()); var parameters = new Dictionary <string, object>(); parameters["max_depth"] = 10; parameters["learning_rate"] = 0.1f; parameters["n_estimators"] = 500; parameters["silent"] = true; parameters["objective"] = "multi:softprob";//"binary:logistic";// parameters["nthread"] = -1; parameters["gamma"] = 0f; parameters["min_child_weight"] = 1; parameters["max_delta_step"] = 1; parameters["subsample"] = 1f; parameters["colsample_bytree"] = 1f; parameters["colsample_bylevel"] = 1f; parameters["reg_alpha"] = 1.5f; parameters["reg_lambda"] = 1f; parameters["scale_pos_weight"] = 1f; parameters["base_score"] = 0.5F; parameters["seed"] = 0; parameters["missing"] = float.NaN; parameters["num_class"] = 2; using (var txgbc = new XGBClassifier(parameters)) { txgbc.Fit(x_train, y_train); txgbc.SaveModelToFile("ext_trained_model.xgb"); } using (var xgbc = BaseXgbModel.LoadClassifierFromFile("ext_trained_model.xgb")) { xgbc.SetParameter("num_class", 2); var testDiscrepancy = 0.0; var trainDiscrepancy = 0.0; var preds = xgbc.Predict(x_test); for (int pos = 0; pos < preds.Length; pos++) { testDiscrepancy += Math.Abs(y_test[pos] - preds[pos]) < 1e-3 ? 1 : 0; } testDiscrepancy /= preds.Length; preds = xgbc.Predict(x_train); for (int pos = 0; pos < preds.Length; pos++) { trainDiscrepancy += Math.Abs(y_train[pos] - preds[pos]) < 1e-3 ? 1 : 0; } trainDiscrepancy /= preds.Length; Console.WriteLine("Train/Test Quality {0}/{1}", trainDiscrepancy, testDiscrepancy); } Console.ReadKey(); }
private static Fifther Fifth() { var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json")); var bagOfTerms = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet <string>(z[1].Split(' '))); var xFilename = "224053984_dataset.json"; var scope = new QuestionnaireScope(); XmlSerializer serializer = new XmlSerializer(typeof(QuestionnaireScope)); using (var reader = new StreamReader("test_fifth.xml")) { scope = (QuestionnaireScope)serializer.Deserialize(reader); } var log = new LoggerConfiguration() .MinimumLevel.Verbose() .WriteTo.LiterateConsole() .CreateLogger(); var repository = new UserGetRepository("passed_tests_ferrets.s3db", log, new CompressorProto()); Dictionary <int, float[]> vectors = new Dictionary <int, float[]>(); var users = new UserGet[0]; var count = 0; const int batch = 1000; do { try { users = repository.RangeSelect(count, batch).ToArray(); foreach (var user in users) { vectors.Add(user.id, user.ToVector(groupmapping, bagOfTerms)); } count += batch; log.Information("Done {Count} recs.", count); } catch (Exception ex) { Console.WriteLine(ex); } } while (users.Length != 0); //File.WriteAllText("datax.csv", string.Join(", ", FlatUsertToVectorMapping.GetHeader(groupmapping.FirstOrDefault().Value.Length, bagOfTerms.Count)) + "\r\n"); //File.AppendAllLines("datax.csv", vectors.Select(z => string.Join(", ", z.Value.Select(x => x.ToString("0.000", CultureInfo.InvariantCulture))))); var X = JsonConvert.DeserializeObject <List <FifthAttendance> >(File.ReadAllText(xFilename)).Where(z => vectors.ContainsKey(z.vkid)).Where(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId).Length == 120).ToArray(); var Y = X.Select(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId)).ToArray(); File.WriteAllLines("answers.csv", Y.Select(z => string.Join(", ", z))); var rnd = new Random(Environment.TickCount); var train = Enumerable.Range(0, X.Length).OrderBy(z => rnd.NextDouble()).ToArray(); var x_train = train.Take(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray(); var y_train = train.Take(X.Length * 80 / 100).Select(z => Y[z]).ToArray(); var x_test = train.Skip(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray(); var y_test = train.Skip(X.Length * 80 / 100).Select(z => Y[z]).ToArray(); var fifther = new Fifther(); Console.WriteLine(); for (int qnum = 0; qnum < 120; qnum++) { Console.WriteLine($"Question: {qnum}"); var yds = y_train.Select(z => (float)z[qnum]).ToArray(); var ytds = y_test.Select(z => (float)z[qnum]).ToArray(); var parameters = new Dictionary <string, object>(); parameters["max_depth"] = 10; parameters["learning_rate"] = 0.1f; parameters["n_estimators"] = 300; parameters["silent"] = true; parameters["objective"] = "multi:softprob";//"binary:logistic";// parameters["nthread"] = -1; parameters["gamma"] = 4f; parameters["min_child_weight"] = 2; parameters["max_delta_step"] = 1; parameters["subsample"] = 1f; parameters["colsample_bytree"] = 1f; parameters["colsample_bylevel"] = 1f; parameters["reg_alpha"] = 0f; parameters["reg_lambda"] = 1f; parameters["scale_pos_weight"] = 1f; parameters["base_score"] = 0.8F; parameters["seed"] = 0; parameters["missing"] = float.NaN; parameters["num_class"] = 5; var xgbc = new XGBClassifier(parameters); xgbc.Fit(x_train, yds); fifther.AddLevel(qnum, xgbc); var discrepancy = 0.0; var dist = 0.0; var preds = xgbc.PredictDistr(x_train); for (int pos = 0; pos < preds.Length; pos++) { var tmp = new float[5]; tmp[(int)yds[pos]] = 1f; dist += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]); discrepancy += det.EuclidianDistance(tmp, preds[pos]); } Console.WriteLine("[Train] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length); preds = xgbc.PredictDistr(x_test); discrepancy = 0.0; dist = 0.0; for (int pos = 0; pos < preds.Length; pos++) { var tmp = new float[5]; tmp[(int)yds[pos]] = 1f; dist += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]); discrepancy += det.EuclidianDistance(tmp, preds[pos]); } Console.WriteLine("[Test ] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length); } Console.WriteLine("Done"); fifther.Save(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "fifth")); Console.ReadLine(); Console.ReadLine(); Console.ReadLine(); Console.ReadLine(); return(fifther); }
public void Initialize(XGBClassifier classifier) { _classifier = classifier; ID = Guid.NewGuid(); }