static void GetSpecificContentWithWatin() { //Kill all ie Process[] processes = Process.GetProcessesByName("iexplore"); foreach (Process process in processes) { process.Kill(); } HadithDBEntities ctx = new HadithDBEntities(); var hadist = (from c in ctx.Hadiths where c.HadithID == 1 || c.HadithID == 3 || c.HadithID == 4 select c).ToList(); using (IE ieInstance = new IE()) { // This will open Internet Explorer browser in maximized mode ieInstance.ShowWindow(WatiN.Core.Native.Windows.NativeMethods.WindowShowStyle.ShowMaximized); ieInstance.Visible = false; ieInstance.WaitForComplete(); // This will store page source in categoryPageSource variable for (int i = 0; i < hadist.Count; i++) { var selHadith = hadist[i]; var hadistIndex = (from c in ctx.HadithIndexes where c.HadithID == selHadith.HadithID && c.No != null orderby c.No select c).ToList(); for (int j = 0; j < hadistIndex.Count; j++) { var selIndex = hadistIndex[j]; var selURL = string.Format("http://sunnah.com/{0}/{1}", selHadith.Name, selIndex.No); try { int HadithOrder = 0; bool isIndonesian = false; bool isUrdu = false; ieInstance.GoTo(selURL); var RdBtn = ieInstance.RadioButton(Find.ById("ch_indonesian")); var RdBtn2 = ieInstance.RadioButton(Find.ById("ch_urdu")); if (ieInstance.Elements.Exists("ch_indonesian") && RdBtn != null) { try { RdBtn.Click(); ieInstance.WaitForComplete(); Thread.Sleep(500); // This will wait for the browser to complete loading of the page isIndonesian = true; } catch { } } if (ieInstance.Elements.Exists("ch_urdu") && RdBtn2 != null) { try { RdBtn2.Click(); ieInstance.WaitForComplete(); Thread.Sleep(500); // This will wait for the browser to complete loading of the page isUrdu = true; } catch { } } string HtmlPage = ieInstance.Html; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(HtmlPage); HadithChapter selChapter = null; int ContentCounter = 0; HadithPage selPage = new HadithPage(); selPage.PageNo = selIndex.No; selPage.HadithID = selHadith.HadithID; //get title foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//div")) { if (node.Attributes["class"] != null && !string.IsNullOrEmpty(node.Attributes["class"].Value)) { switch (node.Attributes["class"].Value) { case "book_page_english_name": selPage.Title = node.InnerText.Trim(); break; case "book_page_arabic_name arabic": selPage.TitleArabic = node.InnerText.Trim(); //ctx.HadithPages.Add(selPage); break; case "chapter": selChapter = new HadithChapter(); selChapter.HadithID = selHadith.HadithID; selChapter.PageNo = selPage.PageNo; //iterate every chapter var chapterNode = node; { var subnode = chapterNode.SelectSingleNode(".//div[@class='echapno']"); if (subnode != null) { try { selChapter.ChapterNo = Convert.ToInt32(subnode.InnerText.Trim().Replace("(", "").Replace(")", "")); } catch { var Parsed = subnode.InnerText.Trim().Replace("(", "").Replace(")", ""); if (Parsed.Contains(',')) { selChapter.ChapterNo = Convert.ToInt32(Parsed.Split(',')[0]); } else { for (int z = 0; z < Parsed.Length; z++) { if (!(Parsed[z] >= '0' && Parsed[z] <= '9')) { Parsed = Parsed.Replace(Parsed[z].ToString(), " "); } } selChapter.ChapterNo = Convert.ToInt32(Parsed.Trim()); } } } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='englishchapter']"); if (subnode != null) { selChapter.Title = subnode.InnerText.Trim(); } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='arabicchapter arabic']"); if (subnode != null) { selChapter.TitleArabic = subnode.InnerText.Trim(); } } //ctx.HadithChapters.Add(selChapter); break; case "arabic achapintro": { if (selChapter != null) { selChapter.Intro = node.InnerText.Trim(); } } break; case "actualHadithContainer": HadithContent selContent = new HadithContent(); selContent.HadithID = selHadith.HadithID; selContent.PageNo = selPage.PageNo; selContent.HadithOrder = ++HadithOrder; if (selChapter != null) { selContent.ChapterNo = selChapter.ChapterNo; } { var subnode = node.SelectSingleNode(".//div[@class='hadith_narrated']"); if (subnode != null) { selContent.Narated = subnode.InnerText.Trim(); } } { var subnode = node.SelectSingleNode(".//div[@class='text_details']"); if (subnode != null) { selContent.ContentEnglish = subnode.InnerText.Trim(); } } if(isIndonesian) { var subnode = node.SelectSingleNode(".//div[@class='indonesian_hadith_full']"); if (subnode != null) { selContent.ContentIndonesia = subnode.InnerText.Trim(); } } if (isUrdu) { var subnode = node.SelectSingleNode(".//div[@class='urdu_hadith_full']"); if (subnode != null) { selContent.ContentUrdu = subnode.InnerText.Trim(); } } { var subnode = node.SelectSingleNode(".//table[@class='gradetable']"); if (subnode != null) { selContent.Grade = subnode.InnerText.Trim(); } } { var subnode = node.SelectSingleNode(".//table[@class='hadith_reference']"); if (subnode != null) { selContent.Reference = subnode.InnerText.Trim(); } } { var subnode = node.SelectNodes(".//span[@class='arabic_sanad arabic']"); if (subnode != null) { selContent.SanadTop = subnode[0].InnerText.Trim(); selContent.SanadBottom = subnode[1].InnerText.Trim(); } } { var subnode = node.SelectSingleNode(".//span[@class='arabic_text_details arabic']"); if (subnode != null) { selContent.ContentArabic = subnode.InnerText.Trim(); } } ctx.HadithContents.Add(selContent); ContentCounter++; if (ContentCounter > 100) { ctx.SaveChanges(); ContentCounter = 0; } break; default: break; } } } } catch (Exception ex) { Console.WriteLine(selURL + ":" + ex.Message + "-" + ex.StackTrace); } ctx.SaveChanges(); } } } Console.WriteLine("selesai"); Console.ReadLine(); }
static void GetBulughContent() { HadithDBEntities ctx = new HadithDBEntities(); var hadist = (from c in ctx.Hadiths where c.Name == "bulugh" select c).ToList(); for (int i = 0; i < hadist.Count; i++) { var selHadith = hadist[i]; var hadistIndex = (from c in ctx.HadithIndexes where c.HadithID == selHadith.HadithID orderby c.No select c).ToList(); for (int j = 0; j < hadistIndex.Count; j++) { var selIndex = hadistIndex[j]; var selURL = string.Format("http://sunnah.com/{0}/{1}", selHadith.Name, selIndex.No); try { var Webget = new HtmlWeb(); var doc = Webget.Load(selURL); HadithChapter selChapter = null; int ContentCounter = 0; HadithPage selPage = new HadithPage(); selPage.PageNo = selIndex.No; selPage.HadithID = selHadith.HadithID; //get title foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//div")) { if (node.Attributes["class"] != null && !string.IsNullOrEmpty(node.Attributes["class"].Value)) { switch (node.Attributes["class"].Value) { case "book_page_english_name": selPage.Title = node.InnerHtml; break; case "book_page_arabic_name arabic": selPage.TitleArabic = node.InnerHtml; //ctx.HadithPages.Add(selPage); break; case "chapter": selChapter = new HadithChapter(); selChapter.HadithID = selHadith.HadithID; selChapter.PageNo = selPage.PageNo; //iterate every chapter var chapterNode = node; { var subnode = chapterNode.SelectSingleNode(".//div[@class='echapno']"); if (subnode != null) { selChapter.ChapterNo = Convert.ToInt32(subnode.InnerText.Replace("(", "").Replace(")", "")); } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='englishchapter']"); if (subnode != null) { selChapter.Title = subnode.InnerText; } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='arabicchapter arabic']"); if (subnode != null) { selChapter.TitleArabic = subnode.InnerText; } } ctx.HadithChapters.Add(selChapter); break; case "arabic achapintro": { selChapter.Intro = node.InnerText; ctx.SaveChanges(); } break; case "actualHadithContainer": HadithContent selContent = new HadithContent(); selContent.HadithID = selHadith.HadithID; selContent.PageNo = selPage.PageNo; //selContent.ChapterNo = selPage.PageNo; if(selChapter!=null) selContent.ChapterNo = selChapter.ChapterNo; { var subnode = node.SelectSingleNode(".//div[@class='hadith_narrated']"); if (subnode != null) { selContent.Narated = subnode.InnerHtml; } } { var subnode = node.SelectSingleNode(".//div[@class='text_details']"); selContent.ContentEnglish = subnode.InnerHtml; } { var subnode = node.SelectSingleNode(".//table[@class='gradetable']"); if (subnode != null) { selContent.Grade = subnode.InnerText; } } { var subnode = node.SelectSingleNode(".//table[@class='hadith_reference']"); selContent.Reference = subnode.InnerHtml; } { var subnode = node.SelectNodes(".//span[@class='arabic_sanad arabic']"); selContent.SanadTop = subnode[0].InnerHtml; selContent.SanadBottom = subnode[1].InnerHtml; } { var subnode = node.SelectSingleNode(".//span[@class='arabic_text_details arabic']"); selContent.ContentArabic = subnode.InnerHtml; } ctx.HadithContents.Add(selContent); ContentCounter++; if (ContentCounter > 100) { ctx.SaveChanges(); ContentCounter = 0; } break; default: break; } } } } catch (Exception ex) { Console.WriteLine(ex.Message); } ctx.SaveChanges(); } } }
static void GetChapters() { HadithDBEntities ctx = new HadithDBEntities(); var hadist = (from c in ctx.Hadiths select c).ToList(); for (int i = 0; i < hadist.Count; i++) { var selHadith = hadist[i]; var hadistIndex = (from c in ctx.HadithIndexes where c.HadithID == selHadith.HadithID orderby c.No select c).ToList(); for (int j = 0; j < hadistIndex.Count; j++) { var selIndex = hadistIndex[j]; var selURL = string.Format("http://sunnah.com/{0}/{1}", selHadith.Name, selIndex.No); try { var Webget = new HtmlWeb(); var doc = Webget.Load(selURL); HadithChapter selChapter = null; int counter = 0; HadithPage selPage = new HadithPage(); selPage.PageNo = selIndex.No; selPage.HadithID = selHadith.HadithID; //get title foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//div")) { if (node.Attributes["class"] != null && !string.IsNullOrEmpty(node.Attributes["class"].Value)) { try { switch (node.Attributes["class"].Value) { case "book_page_english_name": selPage.Title = node.InnerHtml; break; case "book_page_arabic_name arabic": selPage.TitleArabic = node.InnerHtml; //ctx.HadithPages.Add(selPage); break; case "chapter": selChapter = new HadithChapter(); selChapter.HadithID = selHadith.HadithID; selChapter.PageNo = selPage.PageNo; //iterate every chapter var chapterNode = node; { var subnode = chapterNode.SelectSingleNode(".//div[@class='echapno']"); { try { selChapter.ChapterNo = Convert.ToInt32(subnode.InnerText.Replace("(", "").Replace(")", "")); } catch { selChapter.ChapterNoStr = subnode.InnerText.Trim(); var Parsed = subnode.InnerText.Replace("(", "").Replace(")", ""); if (Parsed.Contains(',')) { selChapter.ChapterNo = Convert.ToInt32(Parsed.Split(',')[0]); } else { for (int z = 0; z < Parsed.Length; z++) { if (!(Parsed[z] >= '0' && Parsed[z] <= '9')) { Parsed = Parsed.Replace(Parsed[z].ToString(), " "); } } selChapter.ChapterNo = Convert.ToInt32(Parsed.Trim()); } } } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='englishchapter']"); if (subnode != null) { selChapter.Title = subnode.InnerText.Trim(); } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='arabicchapter arabic']"); if (subnode != null) { selChapter.TitleArabic = subnode.InnerText.Trim(); } } ctx.HadithChapters.Add(selChapter); counter++; if (counter > 100) { ctx.SaveChanges(); counter = 0; } break; case "arabic achapintro": { selChapter.Intro = node.InnerText; } break; default: break; } } catch (Exception ex) { Console.WriteLine("error dalam:" + ex.Message + "-" + ex.StackTrace); continue; } } } } catch (Exception ex) { Console.WriteLine("error luar:"+ex.Message + "-" + ex.StackTrace); } ctx.SaveChanges(); } } Console.WriteLine("selesai baca chapter"); Console.ReadLine(); }