Ejemplo n.º 1
0
        private static void Repack()
        {
            Console.Write("Enter dataset:");
            var cString = Console.ReadLine();

            Console.Write("Enter repacket dataset:");
            var target = Console.ReadLine();
            var log    = new LoggerConfiguration()
                         .MinimumLevel.Verbose()
                         .WriteTo.LiterateConsole()
                         .CreateLogger();
            var repository       = new UserGetRepository(cString, log, new CompressorLZ4());
            var targetRepository = new UserGetRepository(target, log, new CompressorProto());

            var count = 0;
            var users = new List <UserGet>();
            var gsw   = Stopwatch.StartNew();

            do
            {
                users  = new List <UserGet>(repository.RangeSelect(count, 1000));
                count += 1000;
                targetRepository.SaveUsers(users, DateTime.Now);

                log.Information("Done {Count} recs. {DaysForMillion} days", count, TimeSpan.FromMilliseconds(1000000 * gsw.ElapsedMilliseconds / count).TotalDays);
            } while (users.Count != 0);
        }
Ejemplo n.º 2
0
        private static (float[][] x, float[] y) ReadRepo(string cString, float label)
        {
            var repository = new UserGetRepository(cString, new LoggerConfiguration()
                                                   .MinimumLevel.Verbose()
                                                   .WriteTo.LiterateConsole()
                                                   .CreateLogger(), new CompressorProto());
            var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json"));
            //var bagOfTerms = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet<string>(z[1].Split(' ')));

            var count = 0;
            var users = new List <UserGet>();

            var sbx = new List <float[]>();
            var sby = new List <float>();

            do
            {
                users  = new List <UserGet>(repository.RangeSelect(count, 1000));
                count += 1000;
                foreach (var user in users)
                {
                    var vector = user.ToVector(groupmapping /*bagOfTerms*/);
                    sbx.Add(vector);
                    sby.Add(label);
                }
                Console.WriteLine("{0}: {1} read", cString, count);
            } while (users.Count != 0);

            return(sbx.ToArray(), sby.ToArray());
        }
Ejemplo n.º 3
0
        private static void Clean()
        {
            var log = new LoggerConfiguration()
                      .MinimumLevel.Verbose()
                      .WriteTo.LiterateConsole()
                      .CreateLogger();

            try
            {
                Console.Write("Enter antibot:");
                var antibot = Console.ReadLine();
                var regex   = new Regex(@"(?<id>\d+)\b,(?<isbot>\d)");
                var bots    = File.ReadAllLines(antibot).Skip(1).Select(z => regex.Match(z)).Where(z => z.Success && z.Groups["isbot"].Value == "1").Select(z => int.Parse(z.Groups["id"].Value)).ToArray();

                Console.Write("Enter dataset:");
                var cString = Console.ReadLine();


                var repository = new UserGetRepository(cString, log, new CompressorProto());

                repository.DeleteUsers(bots);
            }
            catch (Exception ex)
            {
                log.Error(ex.Message);
            }
            Console.WriteLine("Done");
        }
Ejemplo n.º 4
0
        public void Do_not_add_users_on_second_time()
        {
            //Arrange
            UserGetRepository userRepository = new UserGetRepository();

            userRepository.Get(userName);
            userRepository.cache = cacheServiceMock.Object;

            //Act
            userRepository.Get(userName);

            //Assert
            cacheServiceMock.Verify(m => m.Add(It.IsAny <string>(), It.IsAny <object>()), Times.Never);
        }
Ejemplo n.º 5
0
        private static void Reveal()
        {
            Console.Write("Enter dataset:");
            var cString = Console.ReadLine();

            var log = new LoggerConfiguration()
                      .MinimumLevel.Verbose()
                      .WriteTo.LiterateConsole()
                      .CreateLogger();
            var repository   = new UserGetRepository(cString, log, new CompressorProto());
            var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json"));
            var xgbc         = BaseXgbModel.LoadClassifierFromFile("ext_trained_model.xgb");

            xgbc.SetParameter("num_class", 2);
            var count = 0;
            var sb    = new StringBuilder();

            sb.AppendLine("VkId,IsBot");
            var gsw = Stopwatch.StartNew();

            UserGet[] users = new UserGet[0];
            do
            {
                try
                {
                    users = repository.RangeSelect(count, 10000).ToArray();
                    float[][] input = new float[users.Length][];
                    for (int user = 0; user < users.Length; user++)
                    {
                        input[user] = users[user].ToVector(groupmapping);
                    }
                    var preds = xgbc.Predict(input);
                    for (int user = 0; user < users.Length; user++)
                    {
                        sb.AppendLine($"{users[user].id},{preds[user]}");
                    }
                    count += 10000;
                    File.WriteAllText($"IsBot_{cString}.csv", sb.ToString());
                    log.Information("Done {Count} recs. Bpc {BotPercent}, {DaysForMillion} days", count, preds.Sum() / preds.Length, TimeSpan.FromMilliseconds(1000000 * gsw.ElapsedMilliseconds / count).TotalDays);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            } while (users.Length != 0);
            File.WriteAllText($"IsBot_{cString}.csv", sb.ToString());
            Console.WriteLine("Done");
            Console.ReadLine();
        }
Ejemplo n.º 6
0
        private static void ComputeTask()
        {
            Console.Write("enter_database:");
            var dbfile = Console.ReadLine();

            var log = new LoggerConfiguration()
                      .MinimumLevel.Verbose()
                      .WriteTo.LiterateConsole()
                      .CreateLogger();
            var repository = new UserGetRepository(dbfile, log, new CompressorProto());
            var publics    = new HashSet <int>(File.ReadLines("task\\Publics.txt").Select(z => int.TryParse(z, out int vkid) ? vkid : -1));
            var target     = new HashSet <int>(File.ReadLines("task\\532k_Min_1_opp_public_from_our_4.7kk.txt").Select(z => int.TryParse(z, out int vkid) ? vkid : -1));

            var       users = new UserGet[0];
            var       count = 0;
            const int batch = 50000;

            if (!File.Exists("task.csv"))
            {
                File.WriteAllText($"task.csv", "VkId\tGroups\r\n");
            }
            do
            {
                try
                {
                    users = repository.RangeSelect(count, batch).ToArray();
                    foreach (var user in users)
                    {
                        if (target.Contains(user.id))
                        {
                            var common = publics.Intersect(user.Groups?.Select(z => z.id) ?? new int[0]);
                            foreach (var c in common)
                            {
                                File.AppendAllText("task.csv", $"{user.id}\t{c}\r\n");
                            }
                            log.Information("Done with {UserId}", user.id);
                        }
                    }
                    count += batch;
                    ///log.Information("Done {Count} recs.", count);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            } while (users.Length != 0);
            log.Information("Done");
        }
Ejemplo n.º 7
0
        public void Add_all_default_users_on_first_time()
        {
            //Arrange
            cacheServiceMock.Setup(m => m.Get <dynamic>(It.IsAny <string>())).Returns <dynamic>(null);
            UserGetRepository userRepository = new UserGetRepository
            {
                cache      = cacheServiceMock.Object,
                repository = repositoryMock.Object
            };

            //Act
            userRepository.Get(userName);

            //Assert
            repositoryMock.Verify(m => m.Save(It.IsAny <User>()),
                                  Times.Exactly(UserGetRepository.defaultUsers.Count())
                                  );
        }
Ejemplo n.º 8
0
        private static Fifther Fifth()
        {
            var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json"));
            var bagOfTerms   = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet <string>(z[1].Split(' ')));
            var xFilename    = "224053984_dataset.json";

            var           scope      = new QuestionnaireScope();
            XmlSerializer serializer = new XmlSerializer(typeof(QuestionnaireScope));

            using (var reader = new StreamReader("test_fifth.xml"))
            {
                scope = (QuestionnaireScope)serializer.Deserialize(reader);
            }

            var log = new LoggerConfiguration()
                      .MinimumLevel.Verbose()
                      .WriteTo.LiterateConsole()
                      .CreateLogger();
            var repository = new UserGetRepository("passed_tests_ferrets.s3db", log, new CompressorProto());
            Dictionary <int, float[]> vectors = new Dictionary <int, float[]>();
            var       users = new UserGet[0];
            var       count = 0;
            const int batch = 1000;

            do
            {
                try
                {
                    users = repository.RangeSelect(count, batch).ToArray();
                    foreach (var user in users)
                    {
                        vectors.Add(user.id, user.ToVector(groupmapping, bagOfTerms));
                    }
                    count += batch;
                    log.Information("Done {Count} recs.", count);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            } while (users.Length != 0);

            //File.WriteAllText("datax.csv", string.Join(", ", FlatUsertToVectorMapping.GetHeader(groupmapping.FirstOrDefault().Value.Length, bagOfTerms.Count)) + "\r\n");
            //File.AppendAllLines("datax.csv", vectors.Select(z => string.Join(", ", z.Value.Select(x => x.ToString("0.000", CultureInfo.InvariantCulture)))));

            var X = JsonConvert.DeserializeObject <List <FifthAttendance> >(File.ReadAllText(xFilename)).Where(z => vectors.ContainsKey(z.vkid)).Where(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId).Length == 120).ToArray();
            var Y = X.Select(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId)).ToArray();

            File.WriteAllLines("answers.csv", Y.Select(z => string.Join(", ", z)));


            var rnd   = new Random(Environment.TickCount);
            var train = Enumerable.Range(0, X.Length).OrderBy(z => rnd.NextDouble()).ToArray();

            var x_train = train.Take(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray();
            var y_train = train.Take(X.Length * 80 / 100).Select(z => Y[z]).ToArray();
            var x_test  = train.Skip(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray();
            var y_test  = train.Skip(X.Length * 80 / 100).Select(z => Y[z]).ToArray();

            var fifther = new Fifther();

            Console.WriteLine();

            for (int qnum = 0; qnum < 120; qnum++)
            {
                Console.WriteLine($"Question: {qnum}");
                var yds  = y_train.Select(z => (float)z[qnum]).ToArray();
                var ytds = y_test.Select(z => (float)z[qnum]).ToArray();

                var parameters = new Dictionary <string, object>();
                parameters["max_depth"]     = 10;
                parameters["learning_rate"] = 0.1f;
                parameters["n_estimators"]  = 300;
                parameters["silent"]        = true;
                parameters["objective"]     = "multi:softprob";//"binary:logistic";//

                parameters["nthread"]           = -1;
                parameters["gamma"]             = 4f;
                parameters["min_child_weight"]  = 2;
                parameters["max_delta_step"]    = 1;
                parameters["subsample"]         = 1f;
                parameters["colsample_bytree"]  = 1f;
                parameters["colsample_bylevel"] = 1f;
                parameters["reg_alpha"]         = 0f;
                parameters["reg_lambda"]        = 1f;
                parameters["scale_pos_weight"]  = 1f;

                parameters["base_score"] = 0.8F;
                parameters["seed"]       = 0;
                parameters["missing"]    = float.NaN;
                parameters["num_class"]  = 5;
                var xgbc = new XGBClassifier(parameters);
                xgbc.Fit(x_train, yds);

                fifther.AddLevel(qnum, xgbc);

                var discrepancy = 0.0;
                var dist        = 0.0;
                var preds       = xgbc.PredictDistr(x_train);

                for (int pos = 0; pos < preds.Length; pos++)
                {
                    var tmp = new float[5];
                    tmp[(int)yds[pos]] = 1f;
                    dist        += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]);
                    discrepancy += det.EuclidianDistance(tmp, preds[pos]);
                }
                Console.WriteLine("[Train] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length);
                preds       = xgbc.PredictDistr(x_test);
                discrepancy = 0.0;
                dist        = 0.0;
                for (int pos = 0; pos < preds.Length; pos++)
                {
                    var tmp = new float[5];
                    tmp[(int)yds[pos]] = 1f;
                    dist        += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]);
                    discrepancy += det.EuclidianDistance(tmp, preds[pos]);
                }
                Console.WriteLine("[Test ] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length);
            }
            Console.WriteLine("Done");
            fifther.Save(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "fifth"));
            Console.ReadLine();
            Console.ReadLine();
            Console.ReadLine();
            Console.ReadLine();
            return(fifther);
        }
Ejemplo n.º 9
0
        private static void ComputeFifth()
        {
            var fifther = new Fifther();

            fifther.Load(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "fifth"));

            Console.Write("Enter dataset:");
            var cString    = Console.ReadLine();
            var bagOfTerms = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet <string>(z[1].Split(' ')));

            var           scope      = new QuestionnaireScope();
            XmlSerializer serializer = new XmlSerializer(typeof(QuestionnaireScope));

            using (var reader = new StreamReader("test_fifth.xml"))
            {
                scope = (QuestionnaireScope)serializer.Deserialize(reader);
            }

            var log = new LoggerConfiguration()
                      .MinimumLevel.Verbose()
                      .WriteTo.LiterateConsole()
                      .CreateLogger();
            var repository   = new UserGetRepository(cString, log, new CompressorProto());
            var fifthRepo    = new FifthResultRepository(cString);
            var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json"));

            fifthRepo.CleanAll();
            var count = 0;
            var gsw   = Stopwatch.StartNew();

            UserGet[] users = new UserGet[0];
            const int batch = 1000;

            do
            {
                try
                {
                    users = repository.RangeSelect(count, batch).ToArray();
                    users = users.Where(z => z?.Groups?.Count > 0).ToArray();

                    float[][] input = new float[users.Length][];
                    for (int user = 0; user < users.Length; user++)
                    {
                        input[user] = users[user].ToVector(groupmapping, bagOfTerms);
                    }
                    var preds = fifther.PredictDistr(input, 5);

                    fifthRepo.Insert(users.Select(z => z.id).ToArray(), preds.Select(z => QuestionaireDatasetPreparation.PredictionsToScales(scope, z)).ToArray());

                    count += batch;
                    log.Information("Done {Count} recs. {DaysForMillion} days", count, TimeSpan.FromMilliseconds(1000000 * gsw.ElapsedMilliseconds / count).TotalDays);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            } while (users.Length != 0);

            Console.WriteLine("Done");
            Console.ReadLine();
            Console.ReadLine();
        }