static bool BuildFromCache(string link, Test test) { var fromCache = BuildAnswerCache.FetchCached(link); if (fromCache == null) { return(false); } test.Questions = fromCache; return(true); }
public static void BuildDatabase(string[][] csvFile) { int totalQuestionCount = 0; foreach (var line in csvFile) { Console.Title = "Parsing... Total Questions: " + totalQuestionCount + $", Skipped: {skippedQuestionCount}"; if (line.Length != 5) { continue; } //Parse CSV to useful var link = line[0]; var category = line[1]; var subcategory = line[2]; var redownload = line[3] == "yes"; var defaultSelect = line[4] == "yes"; //Allow for dupes if (alreadyDone.Contains(link)) { Console.WriteLine("Duplicate link: " + link); continue; } alreadyDone.Add(link); //first, try and retreive from cache List <QANode> nodes = BuildAnswerCache.FetchCached(link); if (!redownload && nodes != null) { totalQuestionCount += nodes.Count; //quick scan to remove any nodes that have no answers or garbage answers var nNodes = nodes.Where(a => a.Correct.Length > 0 && a.Correct.Count(b => b < 0 || b >= a.Answers.Length) == 0).Where(a => a.Answers.Length > 0).ToList(); skippedQuestionCount += nodes.Count - nNodes.Count; if (nodes.Count - nNodes.Count > 0) { Console.WriteLine($"\tDeleting {nodes.Count - nNodes.Count} questions without answers"); } //Add UIDs foreach (var q in nNodes) { q.QuestionId = BuildAnswerCache.CalculateMD5Hash(q.Question); } Insert(category, subcategory, defaultSelect, nNodes); continue; } //Otherwise, we have to redownload var pageData = DownloadWebpage(link); if (pageData == null) { Console.WriteLine("404 Not Found: " + link); continue; } Console.WriteLine($" Downloading and parsing: {link}"); var html = new HtmlAgilityPack.HtmlDocument(); html.LoadHtml(Encoding.UTF7.GetString(Encoding.UTF7.GetBytes(pageData))); //Figure out which parser if (link.ToLower().Contains("currentnursing")) { nodes = ParseCurrentNursing(link, html); } if (link.ToLower().Contains("nurseslabs")) { nodes = ParseNursesLabs(link, html); } //Delete questions with no answers if (nodes != null) { var newNodes = nodes.Where(a => a.Correct.Length > 0 && a.Correct.Count(b => b < 0 || b >= a.Answers.Length) == 0).Where(a => a.Answers.Length > 0).ToList(); skippedQuestionCount += nodes.Count - newNodes.Count; if (nodes.Count - newNodes.Count > 0) { Console.WriteLine($"\tDeleting {nodes.Count - newNodes.Count} questions without answers"); } nodes = newNodes; //Add UIDs foreach (var q in nodes) { q.QuestionId = BuildAnswerCache.CalculateMD5Hash(q.Question); } } //If not null, insert it, otherwise warn if (nodes == null) { Console.WriteLine("\tFailed to Parse"); } else { Insert(category, subcategory, defaultSelect, nodes); } if (nodes != null) { totalQuestionCount += nodes.Count; } } //Finally, save the database File.WriteAllText("question_db.json", Newtonsoft.Json.JsonConvert.SerializeObject(categories, Newtonsoft.Json.Formatting.Indented)); }