private static void Repack() { Console.Write("Enter dataset:"); var cString = Console.ReadLine(); Console.Write("Enter repacket dataset:"); var target = Console.ReadLine(); var log = new LoggerConfiguration() .MinimumLevel.Verbose() .WriteTo.LiterateConsole() .CreateLogger(); var repository = new UserGetRepository(cString, log, new CompressorLZ4()); var targetRepository = new UserGetRepository(target, log, new CompressorProto()); var count = 0; var users = new List <UserGet>(); var gsw = Stopwatch.StartNew(); do { users = new List <UserGet>(repository.RangeSelect(count, 1000)); count += 1000; targetRepository.SaveUsers(users, DateTime.Now); log.Information("Done {Count} recs. {DaysForMillion} days", count, TimeSpan.FromMilliseconds(1000000 * gsw.ElapsedMilliseconds / count).TotalDays); } while (users.Count != 0); }
private static (float[][] x, float[] y) ReadRepo(string cString, float label) { var repository = new UserGetRepository(cString, new LoggerConfiguration() .MinimumLevel.Verbose() .WriteTo.LiterateConsole() .CreateLogger(), new CompressorProto()); var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json")); //var bagOfTerms = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet<string>(z[1].Split(' '))); var count = 0; var users = new List <UserGet>(); var sbx = new List <float[]>(); var sby = new List <float>(); do { users = new List <UserGet>(repository.RangeSelect(count, 1000)); count += 1000; foreach (var user in users) { var vector = user.ToVector(groupmapping /*bagOfTerms*/); sbx.Add(vector); sby.Add(label); } Console.WriteLine("{0}: {1} read", cString, count); } while (users.Count != 0); return(sbx.ToArray(), sby.ToArray()); }
private static void Clean() { var log = new LoggerConfiguration() .MinimumLevel.Verbose() .WriteTo.LiterateConsole() .CreateLogger(); try { Console.Write("Enter antibot:"); var antibot = Console.ReadLine(); var regex = new Regex(@"(?<id>\d+)\b,(?<isbot>\d)"); var bots = File.ReadAllLines(antibot).Skip(1).Select(z => regex.Match(z)).Where(z => z.Success && z.Groups["isbot"].Value == "1").Select(z => int.Parse(z.Groups["id"].Value)).ToArray(); Console.Write("Enter dataset:"); var cString = Console.ReadLine(); var repository = new UserGetRepository(cString, log, new CompressorProto()); repository.DeleteUsers(bots); } catch (Exception ex) { log.Error(ex.Message); } Console.WriteLine("Done"); }
public void Do_not_add_users_on_second_time() { //Arrange UserGetRepository userRepository = new UserGetRepository(); userRepository.Get(userName); userRepository.cache = cacheServiceMock.Object; //Act userRepository.Get(userName); //Assert cacheServiceMock.Verify(m => m.Add(It.IsAny <string>(), It.IsAny <object>()), Times.Never); }
private static void Reveal() { Console.Write("Enter dataset:"); var cString = Console.ReadLine(); var log = new LoggerConfiguration() .MinimumLevel.Verbose() .WriteTo.LiterateConsole() .CreateLogger(); var repository = new UserGetRepository(cString, log, new CompressorProto()); var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json")); var xgbc = BaseXgbModel.LoadClassifierFromFile("ext_trained_model.xgb"); xgbc.SetParameter("num_class", 2); var count = 0; var sb = new StringBuilder(); sb.AppendLine("VkId,IsBot"); var gsw = Stopwatch.StartNew(); UserGet[] users = new UserGet[0]; do { try { users = repository.RangeSelect(count, 10000).ToArray(); float[][] input = new float[users.Length][]; for (int user = 0; user < users.Length; user++) { input[user] = users[user].ToVector(groupmapping); } var preds = xgbc.Predict(input); for (int user = 0; user < users.Length; user++) { sb.AppendLine($"{users[user].id},{preds[user]}"); } count += 10000; File.WriteAllText($"IsBot_{cString}.csv", sb.ToString()); log.Information("Done {Count} recs. Bpc {BotPercent}, {DaysForMillion} days", count, preds.Sum() / preds.Length, TimeSpan.FromMilliseconds(1000000 * gsw.ElapsedMilliseconds / count).TotalDays); } catch (Exception ex) { Console.WriteLine(ex); } } while (users.Length != 0); File.WriteAllText($"IsBot_{cString}.csv", sb.ToString()); Console.WriteLine("Done"); Console.ReadLine(); }
private static void ComputeTask() { Console.Write("enter_database:"); var dbfile = Console.ReadLine(); var log = new LoggerConfiguration() .MinimumLevel.Verbose() .WriteTo.LiterateConsole() .CreateLogger(); var repository = new UserGetRepository(dbfile, log, new CompressorProto()); var publics = new HashSet <int>(File.ReadLines("task\\Publics.txt").Select(z => int.TryParse(z, out int vkid) ? vkid : -1)); var target = new HashSet <int>(File.ReadLines("task\\532k_Min_1_opp_public_from_our_4.7kk.txt").Select(z => int.TryParse(z, out int vkid) ? vkid : -1)); var users = new UserGet[0]; var count = 0; const int batch = 50000; if (!File.Exists("task.csv")) { File.WriteAllText($"task.csv", "VkId\tGroups\r\n"); } do { try { users = repository.RangeSelect(count, batch).ToArray(); foreach (var user in users) { if (target.Contains(user.id)) { var common = publics.Intersect(user.Groups?.Select(z => z.id) ?? new int[0]); foreach (var c in common) { File.AppendAllText("task.csv", $"{user.id}\t{c}\r\n"); } log.Information("Done with {UserId}", user.id); } } count += batch; ///log.Information("Done {Count} recs.", count); } catch (Exception ex) { Console.WriteLine(ex); } } while (users.Length != 0); log.Information("Done"); }
public void Add_all_default_users_on_first_time() { //Arrange cacheServiceMock.Setup(m => m.Get <dynamic>(It.IsAny <string>())).Returns <dynamic>(null); UserGetRepository userRepository = new UserGetRepository { cache = cacheServiceMock.Object, repository = repositoryMock.Object }; //Act userRepository.Get(userName); //Assert repositoryMock.Verify(m => m.Save(It.IsAny <User>()), Times.Exactly(UserGetRepository.defaultUsers.Count()) ); }
private static Fifther Fifth() { var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json")); var bagOfTerms = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet <string>(z[1].Split(' '))); var xFilename = "224053984_dataset.json"; var scope = new QuestionnaireScope(); XmlSerializer serializer = new XmlSerializer(typeof(QuestionnaireScope)); using (var reader = new StreamReader("test_fifth.xml")) { scope = (QuestionnaireScope)serializer.Deserialize(reader); } var log = new LoggerConfiguration() .MinimumLevel.Verbose() .WriteTo.LiterateConsole() .CreateLogger(); var repository = new UserGetRepository("passed_tests_ferrets.s3db", log, new CompressorProto()); Dictionary <int, float[]> vectors = new Dictionary <int, float[]>(); var users = new UserGet[0]; var count = 0; const int batch = 1000; do { try { users = repository.RangeSelect(count, batch).ToArray(); foreach (var user in users) { vectors.Add(user.id, user.ToVector(groupmapping, bagOfTerms)); } count += batch; log.Information("Done {Count} recs.", count); } catch (Exception ex) { Console.WriteLine(ex); } } while (users.Length != 0); //File.WriteAllText("datax.csv", string.Join(", ", FlatUsertToVectorMapping.GetHeader(groupmapping.FirstOrDefault().Value.Length, bagOfTerms.Count)) + "\r\n"); //File.AppendAllLines("datax.csv", vectors.Select(z => string.Join(", ", z.Value.Select(x => x.ToString("0.000", CultureInfo.InvariantCulture))))); var X = JsonConvert.DeserializeObject <List <FifthAttendance> >(File.ReadAllText(xFilename)).Where(z => vectors.ContainsKey(z.vkid)).Where(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId).Length == 120).ToArray(); var Y = X.Select(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId)).ToArray(); File.WriteAllLines("answers.csv", Y.Select(z => string.Join(", ", z))); var rnd = new Random(Environment.TickCount); var train = Enumerable.Range(0, X.Length).OrderBy(z => rnd.NextDouble()).ToArray(); var x_train = train.Take(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray(); var y_train = train.Take(X.Length * 80 / 100).Select(z => Y[z]).ToArray(); var x_test = train.Skip(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray(); var y_test = train.Skip(X.Length * 80 / 100).Select(z => Y[z]).ToArray(); var fifther = new Fifther(); Console.WriteLine(); for (int qnum = 0; qnum < 120; qnum++) { Console.WriteLine($"Question: {qnum}"); var yds = y_train.Select(z => (float)z[qnum]).ToArray(); var ytds = y_test.Select(z => (float)z[qnum]).ToArray(); var parameters = new Dictionary <string, object>(); parameters["max_depth"] = 10; parameters["learning_rate"] = 0.1f; parameters["n_estimators"] = 300; parameters["silent"] = true; parameters["objective"] = "multi:softprob";//"binary:logistic";// parameters["nthread"] = -1; parameters["gamma"] = 4f; parameters["min_child_weight"] = 2; parameters["max_delta_step"] = 1; parameters["subsample"] = 1f; parameters["colsample_bytree"] = 1f; parameters["colsample_bylevel"] = 1f; parameters["reg_alpha"] = 0f; parameters["reg_lambda"] = 1f; parameters["scale_pos_weight"] = 1f; parameters["base_score"] = 0.8F; parameters["seed"] = 0; parameters["missing"] = float.NaN; parameters["num_class"] = 5; var xgbc = new XGBClassifier(parameters); xgbc.Fit(x_train, yds); fifther.AddLevel(qnum, xgbc); var discrepancy = 0.0; var dist = 0.0; var preds = xgbc.PredictDistr(x_train); for (int pos = 0; pos < preds.Length; pos++) { var tmp = new float[5]; tmp[(int)yds[pos]] = 1f; dist += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]); discrepancy += det.EuclidianDistance(tmp, preds[pos]); } Console.WriteLine("[Train] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length); preds = xgbc.PredictDistr(x_test); discrepancy = 0.0; dist = 0.0; for (int pos = 0; pos < preds.Length; pos++) { var tmp = new float[5]; tmp[(int)yds[pos]] = 1f; dist += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]); discrepancy += det.EuclidianDistance(tmp, preds[pos]); } Console.WriteLine("[Test ] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length); } Console.WriteLine("Done"); fifther.Save(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "fifth")); Console.ReadLine(); Console.ReadLine(); Console.ReadLine(); Console.ReadLine(); return(fifther); }
private static void ComputeFifth() { var fifther = new Fifther(); fifther.Load(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "fifth")); Console.Write("Enter dataset:"); var cString = Console.ReadLine(); var bagOfTerms = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet <string>(z[1].Split(' '))); var scope = new QuestionnaireScope(); XmlSerializer serializer = new XmlSerializer(typeof(QuestionnaireScope)); using (var reader = new StreamReader("test_fifth.xml")) { scope = (QuestionnaireScope)serializer.Deserialize(reader); } var log = new LoggerConfiguration() .MinimumLevel.Verbose() .WriteTo.LiterateConsole() .CreateLogger(); var repository = new UserGetRepository(cString, log, new CompressorProto()); var fifthRepo = new FifthResultRepository(cString); var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json")); fifthRepo.CleanAll(); var count = 0; var gsw = Stopwatch.StartNew(); UserGet[] users = new UserGet[0]; const int batch = 1000; do { try { users = repository.RangeSelect(count, batch).ToArray(); users = users.Where(z => z?.Groups?.Count > 0).ToArray(); float[][] input = new float[users.Length][]; for (int user = 0; user < users.Length; user++) { input[user] = users[user].ToVector(groupmapping, bagOfTerms); } var preds = fifther.PredictDistr(input, 5); fifthRepo.Insert(users.Select(z => z.id).ToArray(), preds.Select(z => QuestionaireDatasetPreparation.PredictionsToScales(scope, z)).ToArray()); count += batch; log.Information("Done {Count} recs. {DaysForMillion} days", count, TimeSpan.FromMilliseconds(1000000 * gsw.ElapsedMilliseconds / count).TotalDays); } catch (Exception ex) { Console.WriteLine(ex); } } while (users.Length != 0); Console.WriteLine("Done"); Console.ReadLine(); Console.ReadLine(); }