private void DetectBotsAndSave(UserProfileGathering gath, Dictionary <long, double[]> groupmapping, XGBClassifier xgbc, List <UserGet> users) { var vectors = users.Select(z => z.ToVector(groupmapping)).ToArray(); var preds = xgbc.Predict(vectors); _log.Information("Thread {GathId} intended to save {NoBotsUserCount}/{UserCount} accounts", gath.Id, preds.Count(z => z == 0), users.Count); var tmp = new List <UserGet>(); var isbot = new List <Tuple <int, bool> >(); for (int usr = 0; usr < users.Count; usr++) { isbot.Add(new Tuple <int, bool>(users[usr].id, preds[usr] == 1)); if (preds[usr] == 0) { tmp.Add(users[usr]); } } if (isbot.Any()) { _antiBotRepository.SaveUsers(isbot); } if (tmp.Any()) { _repo.SaveUsers(tmp, DateTime.Now); } }
private void UserProfileLabor(UserProfileGathering gath, ConcurrentQueue <int> queue) { try { using (var xgbc = BaseXgbModel.LoadClassifierFromFile("ext_trained_model.xgb")) { xgbc.SetParameter("num_class", 2); _log.Verbose("Thread {GathId} is ready to go!", gath.Id); var users = new List <UserGet>(); while (queue.Count != 0 && !gath.IsSpoiled) { while (queue.TryDequeue(out int id)) { var sw = Stopwatch.StartNew(); var userdata = gath.RetrieveUserData(id); gath.Processed++; _average = 0.999 * _average + 0.001 * sw.ElapsedMilliseconds; _log.Verbose("thread id {GathId} VkId: {VkId} takes {WholeProfileElapsedTime} ms. Gathered {GatheresItemCount}", gath.Id, id, sw.ElapsedMilliseconds, gath.Processed); if (userdata == null) { continue; } users.Add(userdata); if (users.Count > 50) { DetectBotsAndSave(gath, _groupmapping, xgbc, users); users.Clear(); } } } DetectBotsAndSave(gath, _groupmapping, xgbc, users); if (queue.Count != 0) { _log.Error("Job unfinished, but laborer {GathId} died. Gathered {GatheresItemCount}", gath.Id, gath.Processed); } } } catch (Exception ex) { _log.Error(ex, ex.Message); } //_log.Debug("Thread finished"); }