Beispiel #1
0
        static string DownloadWebpage(string link)
        {
            //Check if in cache
            var fname = "html_cache/" + BuildAnswerCache.CalculateMD5Hash(link) + ".txt";

            Directory.CreateDirectory("html_cache");
            if (File.Exists(fname))
            {
                //CONSOLE WRITE
                Console.WriteLine("\t(already in new cache)");
                return(File.ReadAllText(fname));
            }

            try
            {
                var wc = new System.Net.WebClient();
                //COMMENT THIS TO ALLOW DOWNLOADS -- right now it won't actually download anything new
                return(null);

                string downloaded = wc.DownloadString(link);

                //And save
                File.WriteAllText(fname, downloaded);
                return(downloaded);
            }
            catch
            {
                return(null);
            }
        }
Beispiel #2
0
        static bool BuildFromCache(string link, Test test)
        {
            var fromCache = BuildAnswerCache.FetchCached(link);

            if (fromCache == null)
            {
                return(false);
            }

            test.Questions = fromCache;
            return(true);
        }
Beispiel #3
0
        public static void BuildDatabase(string[][] csvFile)
        {
            int totalQuestionCount = 0;

            foreach (var line in csvFile)
            {
                Console.Title = "Parsing... Total Questions: " + totalQuestionCount + $", Skipped: {skippedQuestionCount}";
                if (line.Length != 5)
                {
                    continue;
                }
                //Parse CSV to useful
                var link          = line[0];
                var category      = line[1];
                var subcategory   = line[2];
                var redownload    = line[3] == "yes";
                var defaultSelect = line[4] == "yes";

                //Allow for dupes
                if (alreadyDone.Contains(link))
                {
                    Console.WriteLine("Duplicate link: " + link);
                    continue;
                }
                alreadyDone.Add(link);

                //first, try and retreive from cache
                List <QANode> nodes = BuildAnswerCache.FetchCached(link);

                if (!redownload && nodes != null)
                {
                    totalQuestionCount += nodes.Count;
                    //quick scan to remove any nodes that have no answers or garbage answers
                    var nNodes = nodes.Where(a => a.Correct.Length > 0 && a.Correct.Count(b => b < 0 || b >= a.Answers.Length) == 0).Where(a => a.Answers.Length > 0).ToList();
                    skippedQuestionCount += nodes.Count - nNodes.Count;
                    if (nodes.Count - nNodes.Count > 0)
                    {
                        Console.WriteLine($"\tDeleting {nodes.Count - nNodes.Count} questions without answers");
                    }

                    //Add UIDs
                    foreach (var q in nNodes)
                    {
                        q.QuestionId = BuildAnswerCache.CalculateMD5Hash(q.Question);
                    }

                    Insert(category, subcategory, defaultSelect, nNodes);
                    continue;
                }

                //Otherwise, we have to redownload
                var pageData = DownloadWebpage(link);
                if (pageData == null)
                {
                    Console.WriteLine("404 Not Found: " + link);
                    continue;
                }
                Console.WriteLine($"  Downloading and parsing: {link}");
                var html = new HtmlAgilityPack.HtmlDocument();
                html.LoadHtml(Encoding.UTF7.GetString(Encoding.UTF7.GetBytes(pageData)));

                //Figure out which parser
                if (link.ToLower().Contains("currentnursing"))
                {
                    nodes = ParseCurrentNursing(link, html);
                }

                if (link.ToLower().Contains("nurseslabs"))
                {
                    nodes = ParseNursesLabs(link, html);
                }

                //Delete questions with no answers
                if (nodes != null)
                {
                    var newNodes = nodes.Where(a => a.Correct.Length > 0 && a.Correct.Count(b => b < 0 || b >= a.Answers.Length) == 0).Where(a => a.Answers.Length > 0).ToList();
                    skippedQuestionCount += nodes.Count - newNodes.Count;
                    if (nodes.Count - newNodes.Count > 0)
                    {
                        Console.WriteLine($"\tDeleting {nodes.Count - newNodes.Count} questions without answers");
                    }
                    nodes = newNodes;

                    //Add UIDs
                    foreach (var q in nodes)
                    {
                        q.QuestionId = BuildAnswerCache.CalculateMD5Hash(q.Question);
                    }
                }

                //If not null, insert it, otherwise warn

                if (nodes == null)
                {
                    Console.WriteLine("\tFailed to Parse");
                }
                else
                {
                    Insert(category, subcategory, defaultSelect, nodes);
                }
                if (nodes != null)
                {
                    totalQuestionCount += nodes.Count;
                }
            }

            //Finally, save the database
            File.WriteAllText("question_db.json", Newtonsoft.Json.JsonConvert.SerializeObject(categories, Newtonsoft.Json.Formatting.Indented));
        }