private static Fifther Fifth() { var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json")); var bagOfTerms = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet <string>(z[1].Split(' '))); var xFilename = "224053984_dataset.json"; var scope = new QuestionnaireScope(); XmlSerializer serializer = new XmlSerializer(typeof(QuestionnaireScope)); using (var reader = new StreamReader("test_fifth.xml")) { scope = (QuestionnaireScope)serializer.Deserialize(reader); } var log = new LoggerConfiguration() .MinimumLevel.Verbose() .WriteTo.LiterateConsole() .CreateLogger(); var repository = new UserGetRepository("passed_tests_ferrets.s3db", log, new CompressorProto()); Dictionary <int, float[]> vectors = new Dictionary <int, float[]>(); var users = new UserGet[0]; var count = 0; const int batch = 1000; do { try { users = repository.RangeSelect(count, batch).ToArray(); foreach (var user in users) { vectors.Add(user.id, user.ToVector(groupmapping, bagOfTerms)); } count += batch; log.Information("Done {Count} recs.", count); } catch (Exception ex) { Console.WriteLine(ex); } } while (users.Length != 0); //File.WriteAllText("datax.csv", string.Join(", ", FlatUsertToVectorMapping.GetHeader(groupmapping.FirstOrDefault().Value.Length, bagOfTerms.Count)) + "\r\n"); //File.AppendAllLines("datax.csv", vectors.Select(z => string.Join(", ", z.Value.Select(x => x.ToString("0.000", CultureInfo.InvariantCulture))))); var X = JsonConvert.DeserializeObject <List <FifthAttendance> >(File.ReadAllText(xFilename)).Where(z => vectors.ContainsKey(z.vkid)).Where(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId).Length == 120).ToArray(); var Y = X.Select(z => QuestionaireDatasetPreparation.CovertAnswersToVector(scope, z.AnswersId)).ToArray(); File.WriteAllLines("answers.csv", Y.Select(z => string.Join(", ", z))); var rnd = new Random(Environment.TickCount); var train = Enumerable.Range(0, X.Length).OrderBy(z => rnd.NextDouble()).ToArray(); var x_train = train.Take(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray(); var y_train = train.Take(X.Length * 80 / 100).Select(z => Y[z]).ToArray(); var x_test = train.Skip(X.Length * 80 / 100).Select(z => vectors[X[z].vkid]).ToArray(); var y_test = train.Skip(X.Length * 80 / 100).Select(z => Y[z]).ToArray(); var fifther = new Fifther(); Console.WriteLine(); for (int qnum = 0; qnum < 120; qnum++) { Console.WriteLine($"Question: {qnum}"); var yds = y_train.Select(z => (float)z[qnum]).ToArray(); var ytds = y_test.Select(z => (float)z[qnum]).ToArray(); var parameters = new Dictionary <string, object>(); parameters["max_depth"] = 10; parameters["learning_rate"] = 0.1f; parameters["n_estimators"] = 300; parameters["silent"] = true; parameters["objective"] = "multi:softprob";//"binary:logistic";// parameters["nthread"] = -1; parameters["gamma"] = 4f; parameters["min_child_weight"] = 2; parameters["max_delta_step"] = 1; parameters["subsample"] = 1f; parameters["colsample_bytree"] = 1f; parameters["colsample_bylevel"] = 1f; parameters["reg_alpha"] = 0f; parameters["reg_lambda"] = 1f; parameters["scale_pos_weight"] = 1f; parameters["base_score"] = 0.8F; parameters["seed"] = 0; parameters["missing"] = float.NaN; parameters["num_class"] = 5; var xgbc = new XGBClassifier(parameters); xgbc.Fit(x_train, yds); fifther.AddLevel(qnum, xgbc); var discrepancy = 0.0; var dist = 0.0; var preds = xgbc.PredictDistr(x_train); for (int pos = 0; pos < preds.Length; pos++) { var tmp = new float[5]; tmp[(int)yds[pos]] = 1f; dist += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]); discrepancy += det.EuclidianDistance(tmp, preds[pos]); } Console.WriteLine("[Train] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length); preds = xgbc.PredictDistr(x_test); discrepancy = 0.0; dist = 0.0; for (int pos = 0; pos < preds.Length; pos++) { var tmp = new float[5]; tmp[(int)yds[pos]] = 1f; dist += Math.Abs(det.GetMaxIndex(preds[pos]) - yds[pos]); discrepancy += det.EuclidianDistance(tmp, preds[pos]); } Console.WriteLine("[Test ] Discrepancy {0:0.000} Dist {1:0.000}", 1.0 - discrepancy / (preds.Length * Math.Sqrt(2.0)), dist / preds.Length); } Console.WriteLine("Done"); fifther.Save(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "fifth")); Console.ReadLine(); Console.ReadLine(); Console.ReadLine(); Console.ReadLine(); return(fifther); }
private static void ComputeFifth() { var fifther = new Fifther(); fifther.Load(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "fifth")); Console.Write("Enter dataset:"); var cString = Console.ReadLine(); var bagOfTerms = File.ReadAllLines("res\\expert_topics.csv").Skip(1).Select(z => z.Split(',')).ToDictionary(z => int.Parse(z[0]), z => new HashSet <string>(z[1].Split(' '))); var scope = new QuestionnaireScope(); XmlSerializer serializer = new XmlSerializer(typeof(QuestionnaireScope)); using (var reader = new StreamReader("test_fifth.xml")) { scope = (QuestionnaireScope)serializer.Deserialize(reader); } var log = new LoggerConfiguration() .MinimumLevel.Verbose() .WriteTo.LiterateConsole() .CreateLogger(); var repository = new UserGetRepository(cString, log, new CompressorProto()); var fifthRepo = new FifthResultRepository(cString); var groupmapping = JsonConvert.DeserializeObject <Dictionary <long, double[]> >(File.ReadAllText("map_groups.json")); fifthRepo.CleanAll(); var count = 0; var gsw = Stopwatch.StartNew(); UserGet[] users = new UserGet[0]; const int batch = 1000; do { try { users = repository.RangeSelect(count, batch).ToArray(); users = users.Where(z => z?.Groups?.Count > 0).ToArray(); float[][] input = new float[users.Length][]; for (int user = 0; user < users.Length; user++) { input[user] = users[user].ToVector(groupmapping, bagOfTerms); } var preds = fifther.PredictDistr(input, 5); fifthRepo.Insert(users.Select(z => z.id).ToArray(), preds.Select(z => QuestionaireDatasetPreparation.PredictionsToScales(scope, z)).ToArray()); count += batch; log.Information("Done {Count} recs. {DaysForMillion} days", count, TimeSpan.FromMilliseconds(1000000 * gsw.ElapsedMilliseconds / count).TotalDays); } catch (Exception ex) { Console.WriteLine(ex); } } while (users.Length != 0); Console.WriteLine("Done"); Console.ReadLine(); Console.ReadLine(); }