//private string getTime() //{ // string htmlResult = fetchedRes.InnerHtml.ReplaceFirst("<strong>기상실황표</strong>", ""); // Console.WriteLine("time result: " + htmlResult); // htmlResult = htmlResult.Remove(htmlResult.Length, 3); // return htmlResult; // //DateTime local = DateTime.Now; // //string localDateStr = local.ToString("yyyy-MM-dd-HH") + ".00"; // //return localDateStr; //} private void WriteTheLists(List <CityDetail> thelist) { //think about this later //take only year, month, and day string foldername = currentDateTime.Remove(currentDateTime.Length - 3, 3); string outputpath = Directory.GetCurrentDirectory() + "\\output\\" + foldername + "\\"; bool exists = System.IO.Directory.Exists(outputpath); if (!exists) { System.IO.Directory.CreateDirectory(outputpath); } string filename = outputpath + currentDateTime + ".00.txt"; //string filename = getTime() + ".txt"; //string[] arrFinalStr = new string[thelist.Count()]; List <string> finalStr = new List <string>(); foreach (var list in thelist) { finalStr.Add(list.PrintInString(";")); } File.WriteAllLines(filename, finalStr.ToArray()); RBoxDisplay.AppendText($"{"\nsaved in:" + outputpath + "\n\n"}"); }
private void DoScraping(IHtmlDocument document) { IEnumerable <IElement> articleLink = null; articleLink = document.All.Where(x => x.ClassName == "table-col"); //articleLink = document.QuerySelectorAll("tbody"); if (articleLink.Any()) { PrintResults(articleLink); } else { RBoxDisplay.AppendText($"{"-----------------------\nERROR: No data fetched, please check your internet connection\n\n"}"); } }
private void UseLocalData() { //string thispath = Directory.GetCurrentDirectory(); //string filename = thispath + "\\output\\" + currentDateTime + ".00.txt"; string foldername = currentDateTime.Remove(currentDateTime.Length - 3, 3); string outputpath = Directory.GetCurrentDirectory() + "\\output\\" + foldername + "\\"; outputpath = outputpath + currentDateTime + ".00.txt"; string readText = File.ReadAllText(outputpath); string[] collectedData = splitLocalFileRes(readText); RBoxDisplay.AppendText($"{"Fetch data from :" + outputpath + "\n\n"}"); RBoxDisplay.AppendText($"{"-----FETCHED FROM LOCAL DATA-----\n"}"); foreach (var cd in collectedData) { RBoxDisplay.AppendText($"{cd}"); } }
public void PrintResults(IEnumerable <IElement> articleLink) { List <CityDetail> temporary_data = new List <CityDetail>(); foreach (var element in articleLink) { //if (element.InnerHtml.Contains("tbody")) //{ // for(var i = 0, row; row = element.ChildElementCount()) //} //Console.WriteLine("scrape result:" + element.InnerHtml); int itt = 0; List <string> header_str = new List <string>(); //Console.WriteLine("scrape result:" + element.FirstElementChild); foreach (var b in element.Children) { Console.WriteLine("itterate: " + itt); //table header if (itt == 2) { //only run once, idk why. oh beacuse of the upper head lol bool therealbody = false; foreach (var c in b.Children) { if (!therealbody) { therealbody = true; continue; } CityDetail temp_detail = new CityDetail(); //this is the column foreach (var d in c.Children) { //Console.WriteLine("even this is c: " + d.InnerHtml); d.InnerHtml = d.InnerHtml.Replace("<br>", " "); // doesnt work, just dont forget to do this d.InnerHtml = d.InnerHtml.Replace(" ", ""); //string tempstr = d.InnerHtml; //int location = d.InnerHtml.IndexOf(" "); //if(location > -1) //{ // d.InnerHtml = d.InnerHtml.Remove(location, 6); //} //d.i = d.InnerHtml.IndexOf("<br>") //a.UniversalDetail.Add(d.InnerHtml); temp_detail.UniversalDetail.Add(d.InnerHtml.HtmlEncode(Encoding.UTF8)); //string temp_str = d.InnerHtml.HtmlEncode(Encoding.UTF8); header_str.Add(d.InnerHtml.ToString()); } temporary_data.Add(temp_detail); } //int a = 10; } //table content else if (itt == 3) { foreach (var c in b.Children) { //int itt1 = 0; CityDetail temp_detail = new CityDetail(); bool justheader = false; foreach (var d in c.Children) { //Console.WriteLine("even this is d: " + d.InnerHtml); if (!justheader) { justheader = true; foreach (var e in d.Children) { d.InnerHtml = d.InnerHtml.Replace(" ", "-"); //Console.WriteLine("just for city: " + e.InnerHtml); temp_detail.UniversalDetail.Add(e.InnerHtml.HtmlEncode(Encoding.UTF8)); } continue; } d.InnerHtml = d.InnerHtml.Replace(" ", "-"); //Console.WriteLine("even this is d: " + d.InnerHtml); temp_detail.UniversalDetail.Add(d.InnerHtml.HtmlEncode(Encoding.UTF8)); //Console.WriteLine("itt1: " + itt1++); } temporary_data.Add(temp_detail); } } itt++; } } int itterator_helper = 0; RBoxDisplay.AppendText($"{"-----FETCHED FROM ONLINE-----\n"}"); foreach (var q in temporary_data) { string disp; disp = "\n" + itterator_helper + ". " + q.PrintInString(";"); RBoxDisplay.AppendText($"{disp}"); itterator_helper++; } //scrapping done. now move them into file WriteTheLists(temporary_data); }