Ejemplo n.º 1
0
        private static Fifther Fifth()
        {
            var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json"));
            var bagOfTerms   = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet <string>(z[1].Split(' ')));
            var xFilename    = "224053984_dataset.json";

            var           scope      = new QuestionnaireScope();
            XmlSerializer serializer = new XmlSerializer(typeof(QuestionnaireScope));

            using (var reader = new StreamReader("test_fifth.xml"))
            {
                scope = (QuestionnaireScope)serializer.Deserialize(reader);
            }

            var log = new LoggerConfiguration()
                      .MinimumLevel.Verbose()
                      .WriteTo.LiterateConsole()
                      .CreateLogger();
            var repository = new UserGetRepository("passed_tests_ferrets.s3db", log, new CompressorProto());
            Dictionary <int, float[]> vectors = new Dictionary <int, float[]>();
            var       users = new UserGet[0];
            var       count = 0;
            const int batch = 1000;

            do
            {
                try
                {
                    users = repository.RangeSelect(count, batch).ToArray();
                    foreach (var user in users)
                    {
                        vectors.Add(user.id, user.ToVector(groupmapping, bagOfTerms));
                    }
                    count += batch;
                    log.Information("Done {Count} recs.", count);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            } while (users.Length != 0);

            //File.WriteAllText("datax.csv", string.Join(", ", FlatUsertToVectorMapping.GetHeader(groupmapping.FirstOrDefault().Value.Length, bagOfTerms.Count)) + "\r\n");
            //File.AppendAllLines("datax.csv", vectors.Select(z => string.Join(", ", z.Value.Select(x => x.ToString("0.000", CultureInfo.InvariantCulture)))));

            var X = JsonConvert.DeserializeObject <List <FifthAttendance> >(File.ReadAllText(xFilename)).Where(z => vectors.ContainsKey(z.vkid)).Where(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId).Length == 120).ToArray();
            var Y = X.Select(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId)).ToArray();

            File.WriteAllLines("answers.csv", Y.Select(z => string.Join(", ", z)));


            var rnd   = new Random(Environment.TickCount);
            var train = Enumerable.Range(0, X.Length).OrderBy(z => rnd.NextDouble()).ToArray();

            var x_train = train.Take(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray();
            var y_train = train.Take(X.Length * 80 / 100).Select(z => Y[z]).ToArray();
            var x_test  = train.Skip(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray();
            var y_test  = train.Skip(X.Length * 80 / 100).Select(z => Y[z]).ToArray();

            var fifther = new Fifther();

            Console.WriteLine();

            for (int qnum = 0; qnum < 120; qnum++)
            {
                Console.WriteLine($"Question: {qnum}");
                var yds  = y_train.Select(z => (float)z[qnum]).ToArray();
                var ytds = y_test.Select(z => (float)z[qnum]).ToArray();

                var parameters = new Dictionary <string, object>();
                parameters["max_depth"]     = 10;
                parameters["learning_rate"] = 0.1f;
                parameters["n_estimators"]  = 300;
                parameters["silent"]        = true;
                parameters["objective"]     = "multi:softprob";//"binary:logistic";//

                parameters["nthread"]           = -1;
                parameters["gamma"]             = 4f;
                parameters["min_child_weight"]  = 2;
                parameters["max_delta_step"]    = 1;
                parameters["subsample"]         = 1f;
                parameters["colsample_bytree"]  = 1f;
                parameters["colsample_bylevel"] = 1f;
                parameters["reg_alpha"]         = 0f;
                parameters["reg_lambda"]        = 1f;
                parameters["scale_pos_weight"]  = 1f;

                parameters["base_score"] = 0.8F;
                parameters["seed"]       = 0;
                parameters["missing"]    = float.NaN;
                parameters["num_class"]  = 5;
                var xgbc = new XGBClassifier(parameters);
                xgbc.Fit(x_train, yds);

                fifther.AddLevel(qnum, xgbc);

                var discrepancy = 0.0;
                var dist        = 0.0;
                var preds       = xgbc.PredictDistr(x_train);

                for (int pos = 0; pos < preds.Length; pos++)
                {
                    var tmp = new float[5];
                    tmp[(int)yds[pos]] = 1f;
                    dist        += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]);
                    discrepancy += det.EuclidianDistance(tmp, preds[pos]);
                }
                Console.WriteLine("[Train] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length);
                preds       = xgbc.PredictDistr(x_test);
                discrepancy = 0.0;
                dist        = 0.0;
                for (int pos = 0; pos < preds.Length; pos++)
                {
                    var tmp = new float[5];
                    tmp[(int)yds[pos]] = 1f;
                    dist        += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]);
                    discrepancy += det.EuclidianDistance(tmp, preds[pos]);
                }
                Console.WriteLine("[Test ] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length);
            }
            Console.WriteLine("Done");
            fifther.Save(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "fifth"));
            Console.ReadLine();
            Console.ReadLine();
            Console.ReadLine();
            Console.ReadLine();
            return(fifther);
        }
Ejemplo n.º 2
0
        private static void ComputeFifth()
        {
            var fifther = new Fifther();

            fifther.Load(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "fifth"));

            Console.Write("Enter dataset:");
            var cString    = Console.ReadLine();
            var bagOfTerms = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet <string>(z[1].Split(' ')));

            var           scope      = new QuestionnaireScope();
            XmlSerializer serializer = new XmlSerializer(typeof(QuestionnaireScope));

            using (var reader = new StreamReader("test_fifth.xml"))
            {
                scope = (QuestionnaireScope)serializer.Deserialize(reader);
            }

            var log = new LoggerConfiguration()
                      .MinimumLevel.Verbose()
                      .WriteTo.LiterateConsole()
                      .CreateLogger();
            var repository   = new UserGetRepository(cString, log, new CompressorProto());
            var fifthRepo    = new FifthResultRepository(cString);
            var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json"));

            fifthRepo.CleanAll();
            var count = 0;
            var gsw   = Stopwatch.StartNew();

            UserGet[] users = new UserGet[0];
            const int batch = 1000;

            do
            {
                try
                {
                    users = repository.RangeSelect(count, batch).ToArray();
                    users = users.Where(z => z?.Groups?.Count > 0).ToArray();

                    float[][] input = new float[users.Length][];
                    for (int user = 0; user < users.Length; user++)
                    {
                        input[user] = users[user].ToVector(groupmapping, bagOfTerms);
                    }
                    var preds = fifther.PredictDistr(input, 5);

                    fifthRepo.Insert(users.Select(z => z.id).ToArray(), preds.Select(z => QuestionaireDatasetPreparation.PredictionsToScales(scope, z)).ToArray());

                    count += batch;
                    log.Information("Done {Count} recs. {DaysForMillion} days", count, TimeSpan.FromMilliseconds(1000000 * gsw.ElapsedMilliseconds / count).TotalDays);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            } while (users.Length != 0);

            Console.WriteLine("Done");
            Console.ReadLine();
            Console.ReadLine();
        }