static void CopyHadith() { int counter = 0; HadithDBEntities ctxOri = new HadithDBEntities(); HadithDB2Entities ctx = new HadithDB2Entities(); var hadist = (from c in ctx.HadithContents where c.HadithID==4 select c).ToList(); foreach (var item in hadist) { HadithContent newItem = new HadithContent() { BookRef = item.BookRef, ContentIndonesia=item.ContentIndonesia, HadithID=item.HadithID, ChapterNo=item.ChapterNo, ContentArabic=item.ContentArabic, ContentEnglish=item.ContentEnglish, ContentUrdu=item.ContentUrdu, Grade=item.Grade , HadithOrder=item.HadithOrder, Narated=item.Narated, OtherRef=item.OtherRef, PageNo=item.PageNo, Reference=item.Reference, SanadBottom=item.SanadBottom, SanadTop=item.SanadTop, UrlRef=item.UrlRef , USCRef=item.USCRef}; ctxOri.HadithContents.Add(newItem); if (++counter > 200) { counter=0; ctxOri.SaveChanges(); } } ctxOri.SaveChanges(); }
static void CleanUpIndex() { HadithDBEntities ctx = new HadithDBEntities(); var indeks = (from c in ctx.HadithPages select c).ToList(); int counter = 0; foreach (var item in indeks) { counter++; item.Title = item.Title.Trim(); if (counter > 100) { counter = 0; ctx.SaveChanges(); } } }
static void GetBulughContent() { HadithDBEntities ctx = new HadithDBEntities(); var hadist = (from c in ctx.Hadiths where c.Name == "bulugh" select c).ToList(); for (int i = 0; i < hadist.Count; i++) { var selHadith = hadist[i]; var hadistIndex = (from c in ctx.HadithIndexes where c.HadithID == selHadith.HadithID orderby c.No select c).ToList(); for (int j = 0; j < hadistIndex.Count; j++) { var selIndex = hadistIndex[j]; var selURL = string.Format("http://sunnah.com/{0}/{1}", selHadith.Name, selIndex.No); try { var Webget = new HtmlWeb(); var doc = Webget.Load(selURL); HadithChapter selChapter = null; int ContentCounter = 0; HadithPage selPage = new HadithPage(); selPage.PageNo = selIndex.No; selPage.HadithID = selHadith.HadithID; //get title foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//div")) { if (node.Attributes["class"] != null && !string.IsNullOrEmpty(node.Attributes["class"].Value)) { switch (node.Attributes["class"].Value) { case "book_page_english_name": selPage.Title = node.InnerHtml; break; case "book_page_arabic_name arabic": selPage.TitleArabic = node.InnerHtml; //ctx.HadithPages.Add(selPage); break; case "chapter": selChapter = new HadithChapter(); selChapter.HadithID = selHadith.HadithID; selChapter.PageNo = selPage.PageNo; //iterate every chapter var chapterNode = node; { var subnode = chapterNode.SelectSingleNode(".//div[@class='echapno']"); if (subnode != null) { selChapter.ChapterNo = Convert.ToInt32(subnode.InnerText.Replace("(", "").Replace(")", "")); } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='englishchapter']"); if (subnode != null) { selChapter.Title = subnode.InnerText; } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='arabicchapter arabic']"); if (subnode != null) { selChapter.TitleArabic = subnode.InnerText; } } ctx.HadithChapters.Add(selChapter); break; case "arabic achapintro": { selChapter.Intro = node.InnerText; ctx.SaveChanges(); } break; case "actualHadithContainer": HadithContent selContent = new HadithContent(); selContent.HadithID = selHadith.HadithID; selContent.PageNo = selPage.PageNo; //selContent.ChapterNo = selPage.PageNo; if(selChapter!=null) selContent.ChapterNo = selChapter.ChapterNo; { var subnode = node.SelectSingleNode(".//div[@class='hadith_narrated']"); if (subnode != null) { selContent.Narated = subnode.InnerHtml; } } { var subnode = node.SelectSingleNode(".//div[@class='text_details']"); selContent.ContentEnglish = subnode.InnerHtml; } { var subnode = node.SelectSingleNode(".//table[@class='gradetable']"); if (subnode != null) { selContent.Grade = subnode.InnerText; } } { var subnode = node.SelectSingleNode(".//table[@class='hadith_reference']"); selContent.Reference = subnode.InnerHtml; } { var subnode = node.SelectNodes(".//span[@class='arabic_sanad arabic']"); selContent.SanadTop = subnode[0].InnerHtml; selContent.SanadBottom = subnode[1].InnerHtml; } { var subnode = node.SelectSingleNode(".//span[@class='arabic_text_details arabic']"); selContent.ContentArabic = subnode.InnerHtml; } ctx.HadithContents.Add(selContent); ContentCounter++; if (ContentCounter > 100) { ctx.SaveChanges(); ContentCounter = 0; } break; default: break; } } } } catch (Exception ex) { Console.WriteLine(ex.Message); } ctx.SaveChanges(); } } }
static void CleanHTML() { HtmlDocument doc = new HtmlDocument(); HadithDBEntities ctx = new HadithDBEntities(); var hadist = (from c in ctx.Hadiths select c).ToList(); for (int i = 0; i < hadist.Count; i++) { var selHadith = hadist[i]; var content = from c in ctx.HadithContents where c.HadithID == selHadith.HadithID orderby c.ContentID select c; int counter = 0; foreach (var item in content) { if (!string.IsNullOrEmpty(item.Grade)) { counter++; item.Grade = item.Grade.Trim(); } /* if (!string.IsNullOrEmpty(item.Reference)) { item.Reference = item.Reference.Replace(":", string.Empty).Trim(); //doc.LoadHtml(item.Reference); //item.Reference = doc.DocumentNode.InnerText; } if (!string.IsNullOrEmpty(item.BookRef)) { item.BookRef = item.BookRef.Replace(":",string.Empty).Trim(); //doc.LoadHtml(item.BookRef); //item.BookRef = doc.DocumentNode.InnerText; } if (!string.IsNullOrEmpty(item.USCRef)) { item.USCRef = item.USCRef.Replace(":", string.Empty).Trim(); //doc.LoadHtml(item.USCRef); //item.USCRef = doc.DocumentNode.InnerText; } if (!string.IsNullOrEmpty(item.ContentArabic)) { counter = 0; //item.Reference = HttpUtility.HtmlDecode(item.Reference); doc.LoadHtml(item.ContentArabic); var nodes = doc.DocumentNode.SelectNodes("//a"); string RefURL = string.Empty; if (nodes != null) { foreach (HtmlNode itm in nodes) { if (counter > 0) { RefURL += "|"; } var tmp = itm.Attributes["href"].Value.Replace("javascript:openquran", string.Empty); tmp = tmp.Replace("(", string.Empty).Replace(")", string.Empty); string[] tmpStr = tmp.Split(','); if (tmpStr.Length > 2) { RefURL += string.Format("http://quran.com/{0}/{1}-{2}", Convert.ToInt32(tmpStr[0]) + 1, tmpStr[1], tmpStr[2]); counter++; } else if (tmpStr.Length <= 1 && !tmpStr[0].Contains("comment")) { RefURL += string.Format("http://sunnah.com{0}", tmpStr[0]); counter++; } } if (!string.IsNullOrEmpty(RefURL)) { item.UrlRef = RefURL; } } //item.ContentArabic = doc.DocumentNode.InnerText.Trim(); } if (!string.IsNullOrEmpty(item.Grade)) { item.Grade = item.Grade.Replace("Grade", string.Empty).Replace(":",string.Empty); //item.Grade = HttpUtility.HtmlDecode(item.Grade); //doc.LoadHtml(item.Grade); //item.Grade = doc.DocumentNode.InnerText.Trim(); } if (!string.IsNullOrEmpty(item.ContentEnglish)) { item.ContentEnglish = item.ContentEnglish.Replace("(?)", string.Empty); } if (!string.IsNullOrEmpty(item.Narated)) { //item.Reference = HttpUtility.HtmlDecode(item.Reference); doc.LoadHtml(item.Narated); item.Narated = doc.DocumentNode.InnerText.Trim(); } if (!string.IsNullOrEmpty(item.SanadBottom)) { //item.Reference = HttpUtility.HtmlDecode(item.Reference); doc.LoadHtml(item.SanadBottom); item.SanadBottom = doc.DocumentNode.InnerText.Trim(); } if (!string.IsNullOrEmpty(item.SanadTop)) { //item.Reference = HttpUtility.HtmlDecode(item.Reference); doc.LoadHtml(item.SanadTop); item.SanadTop = doc.DocumentNode.InnerText.Trim(); } if (!string.IsNullOrEmpty(item.ContentArabic)) { //item.Reference = HttpUtility.HtmlDecode(item.Reference); doc.LoadHtml(item.ContentArabic); item.ContentArabic = doc.DocumentNode.InnerText.Trim(); } if (!string.IsNullOrEmpty(item.ContentEnglish)) { //item.Reference = HttpUtility.HtmlDecode(item.Reference); doc.LoadHtml(item.ContentEnglish); item.ContentEnglish = doc.DocumentNode.InnerText.Trim(); } if (!string.IsNullOrEmpty(item.ContentIndonesia)) { //item.Reference = HttpUtility.HtmlDecode(item.Reference); doc.LoadHtml(item.ContentIndonesia); item.ContentIndonesia = doc.DocumentNode.InnerText.Trim(); } if (!string.IsNullOrEmpty(item.ContentUrdu)) { //item.Reference = HttpUtility.HtmlDecode(item.Reference); doc.LoadHtml(item.ContentUrdu); item.ContentUrdu = doc.DocumentNode.InnerText.Trim(); } if (!string.IsNullOrEmpty(item.Reference)) { item.Reference = HttpUtility.HtmlDecode(item.Reference); //doc.LoadHtml(item.Reference); //item.Reference = doc.DocumentNode.InnerText; } if (!string.IsNullOrEmpty(item.BookRef)) { item.BookRef = HttpUtility.HtmlDecode(item.BookRef); //doc.LoadHtml(item.BookRef); //item.BookRef = doc.DocumentNode.InnerText; } if (!string.IsNullOrEmpty(item.USCRef)) { item.USCRef = HttpUtility.HtmlDecode(item.USCRef); //doc.LoadHtml(item.USCRef); //item.USCRef = doc.DocumentNode.InnerText; }*/ } ctx.SaveChanges(); } }
static void RepairOrder() { HadithDBEntities ctx = new HadithDBEntities(); var hadist = (from c in ctx.Hadiths where c.HadithID==13 select c).ToList(); for (int i = 0; i < hadist.Count; i++) { var selHadith = hadist[i]; var hadistIndex = (from c in ctx.HadithIndexes where c.HadithID == selHadith.HadithID && c.No!=null orderby c.No select c.No).ToArray(); foreach(var j in hadistIndex) { int Counter = 0; var selContents = from c in ctx.HadithContents where c.HadithID == selHadith.HadithID && c.PageNo == j orderby c.ContentID ascending select c; foreach (var item in selContents) { item.HadithOrder = ++Counter; } ctx.SaveChanges(); } } }
static void GetSpecificContentWithWatin() { //Kill all ie Process[] processes = Process.GetProcessesByName("iexplore"); foreach (Process process in processes) { process.Kill(); } HadithDBEntities ctx = new HadithDBEntities(); var hadist = (from c in ctx.Hadiths where c.HadithID == 1 || c.HadithID == 3 || c.HadithID == 4 select c).ToList(); using (IE ieInstance = new IE()) { // This will open Internet Explorer browser in maximized mode ieInstance.ShowWindow(WatiN.Core.Native.Windows.NativeMethods.WindowShowStyle.ShowMaximized); ieInstance.Visible = false; ieInstance.WaitForComplete(); // This will store page source in categoryPageSource variable for (int i = 0; i < hadist.Count; i++) { var selHadith = hadist[i]; var hadistIndex = (from c in ctx.HadithIndexes where c.HadithID == selHadith.HadithID && c.No != null orderby c.No select c).ToList(); for (int j = 0; j < hadistIndex.Count; j++) { var selIndex = hadistIndex[j]; var selURL = string.Format("http://sunnah.com/{0}/{1}", selHadith.Name, selIndex.No); try { int HadithOrder = 0; bool isIndonesian = false; bool isUrdu = false; ieInstance.GoTo(selURL); var RdBtn = ieInstance.RadioButton(Find.ById("ch_indonesian")); var RdBtn2 = ieInstance.RadioButton(Find.ById("ch_urdu")); if (ieInstance.Elements.Exists("ch_indonesian") && RdBtn != null) { try { RdBtn.Click(); ieInstance.WaitForComplete(); Thread.Sleep(500); // This will wait for the browser to complete loading of the page isIndonesian = true; } catch { } } if (ieInstance.Elements.Exists("ch_urdu") && RdBtn2 != null) { try { RdBtn2.Click(); ieInstance.WaitForComplete(); Thread.Sleep(500); // This will wait for the browser to complete loading of the page isUrdu = true; } catch { } } string HtmlPage = ieInstance.Html; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(HtmlPage); HadithChapter selChapter = null; int ContentCounter = 0; HadithPage selPage = new HadithPage(); selPage.PageNo = selIndex.No; selPage.HadithID = selHadith.HadithID; //get title foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//div")) { if (node.Attributes["class"] != null && !string.IsNullOrEmpty(node.Attributes["class"].Value)) { switch (node.Attributes["class"].Value) { case "book_page_english_name": selPage.Title = node.InnerText.Trim(); break; case "book_page_arabic_name arabic": selPage.TitleArabic = node.InnerText.Trim(); //ctx.HadithPages.Add(selPage); break; case "chapter": selChapter = new HadithChapter(); selChapter.HadithID = selHadith.HadithID; selChapter.PageNo = selPage.PageNo; //iterate every chapter var chapterNode = node; { var subnode = chapterNode.SelectSingleNode(".//div[@class='echapno']"); if (subnode != null) { try { selChapter.ChapterNo = Convert.ToInt32(subnode.InnerText.Trim().Replace("(", "").Replace(")", "")); } catch { var Parsed = subnode.InnerText.Trim().Replace("(", "").Replace(")", ""); if (Parsed.Contains(',')) { selChapter.ChapterNo = Convert.ToInt32(Parsed.Split(',')[0]); } else { for (int z = 0; z < Parsed.Length; z++) { if (!(Parsed[z] >= '0' && Parsed[z] <= '9')) { Parsed = Parsed.Replace(Parsed[z].ToString(), " "); } } selChapter.ChapterNo = Convert.ToInt32(Parsed.Trim()); } } } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='englishchapter']"); if (subnode != null) { selChapter.Title = subnode.InnerText.Trim(); } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='arabicchapter arabic']"); if (subnode != null) { selChapter.TitleArabic = subnode.InnerText.Trim(); } } //ctx.HadithChapters.Add(selChapter); break; case "arabic achapintro": { if (selChapter != null) { selChapter.Intro = node.InnerText.Trim(); } } break; case "actualHadithContainer": HadithContent selContent = new HadithContent(); selContent.HadithID = selHadith.HadithID; selContent.PageNo = selPage.PageNo; selContent.HadithOrder = ++HadithOrder; if (selChapter != null) { selContent.ChapterNo = selChapter.ChapterNo; } { var subnode = node.SelectSingleNode(".//div[@class='hadith_narrated']"); if (subnode != null) { selContent.Narated = subnode.InnerText.Trim(); } } { var subnode = node.SelectSingleNode(".//div[@class='text_details']"); if (subnode != null) { selContent.ContentEnglish = subnode.InnerText.Trim(); } } if(isIndonesian) { var subnode = node.SelectSingleNode(".//div[@class='indonesian_hadith_full']"); if (subnode != null) { selContent.ContentIndonesia = subnode.InnerText.Trim(); } } if (isUrdu) { var subnode = node.SelectSingleNode(".//div[@class='urdu_hadith_full']"); if (subnode != null) { selContent.ContentUrdu = subnode.InnerText.Trim(); } } { var subnode = node.SelectSingleNode(".//table[@class='gradetable']"); if (subnode != null) { selContent.Grade = subnode.InnerText.Trim(); } } { var subnode = node.SelectSingleNode(".//table[@class='hadith_reference']"); if (subnode != null) { selContent.Reference = subnode.InnerText.Trim(); } } { var subnode = node.SelectNodes(".//span[@class='arabic_sanad arabic']"); if (subnode != null) { selContent.SanadTop = subnode[0].InnerText.Trim(); selContent.SanadBottom = subnode[1].InnerText.Trim(); } } { var subnode = node.SelectSingleNode(".//span[@class='arabic_text_details arabic']"); if (subnode != null) { selContent.ContentArabic = subnode.InnerText.Trim(); } } ctx.HadithContents.Add(selContent); ContentCounter++; if (ContentCounter > 100) { ctx.SaveChanges(); ContentCounter = 0; } break; default: break; } } } } catch (Exception ex) { Console.WriteLine(selURL + ":" + ex.Message + "-" + ex.StackTrace); } ctx.SaveChanges(); } } } Console.WriteLine("selesai"); Console.ReadLine(); }
static void RepairReference() { HadithDBEntities ctx = new HadithDBEntities(); var hadist = (from c in ctx.Hadiths select c).ToList(); for (int i = 0; i < hadist.Count; i++) { var selHadith = hadist[i]; var content = from c in ctx.HadithContents where c.HadithID == selHadith.HadithID orderby c.ContentID select c; foreach (var item in content) { if (!string.IsNullOrEmpty(item.Reference)) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(item.Reference); int counter = 0; var nodes = doc.DocumentNode.SelectNodes("//td"); if (nodes != null) { List<string> RefArray = new List<string>(); foreach (HtmlNode node in nodes) { counter++; string RefContent = node.InnerText; Console.WriteLine(counter + " - " + RefContent); if (counter % 2 == 0) { RefArray.Add(RefContent); } } if (RefArray.Count > 0) item.Reference = RefArray[0]; if (RefArray.Count > 1) item.BookRef = RefArray[1]; if (RefArray.Count > 2) item.USCRef = RefArray[2]; } } } ctx.SaveChanges(); } }
static void getHadith() { HadithDBEntities ctx = new HadithDBEntities(); var perowih = new string[] { "bukhari", "muslim", "nasai", "abudawud", "tirmidhi", "ibnmajah", "malik", "nawawi40", "riyadussaliheen", "adab", "qudsi40", "shamail", "bulugh" }; foreach (var item in perowih) { Hadith newNode = new Hadith(); newNode.Name = item; try { var Webget = new HtmlWeb(); var doc = Webget.Load(string.Format("http://sunnah.com/{0}/about", item)); foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//div[@class='abouttitle']")) { newNode.Title = node.InnerHtml; } foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//div[@class='about']")) { newNode.About = node.InnerHtml; } } catch { } ctx.Hadiths.Add(newNode); ctx.SaveChanges(); } }
static void GetIndex() { HadithDBEntities ctx = new HadithDBEntities(); var hadist = (from c in ctx.Hadiths select c).ToList(); for (int i = 0; i < hadist.Count();i++ ) { var item = hadist[i]; try { var Webget = new HtmlWeb(); var doc = Webget.Load(string.Format("http://sunnah.com/{0}", item.Name)); foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//div[@class='colindextitle']//div[@class='arabic']")) { item.Arabic = node.InnerHtml; continue; } var nodes = doc.DocumentNode.SelectNodes("//div[@class='book_titles']//div[contains(@class,'book_title')]"); foreach (HtmlNode node in nodes) { HadithIndex newNode = new HadithIndex(); newNode.HadithID = item.HadithID; try { { var subnode = node.SelectSingleNode(".//div[@class='book_number']"); newNode.No = Convert.ToInt32(subnode.InnerText); } { var subnode = node.SelectSingleNode(".//div[@class='english_book_name']"); newNode.Name = subnode.InnerText; } { var subnode = node.SelectSingleNode(".//div[@class='arabic arabic_book_name']"); newNode.ArabicName = subnode.InnerText; } { var subnode = node.SelectNodes(".//div[@class='book_range_from']"); newNode.IndexFrom = Convert.ToInt32(subnode[0].InnerText); newNode.IndexTo = Convert.ToInt32(subnode[1].InnerText); } } catch (Exception ex) { Console.WriteLine(ex.Message); } ctx.HadithIndexes.Add(newNode); } } catch (Exception ex) { Console.WriteLine(ex.Message); } ctx.SaveChanges(); } }
static void GetChapters() { HadithDBEntities ctx = new HadithDBEntities(); var hadist = (from c in ctx.Hadiths select c).ToList(); for (int i = 0; i < hadist.Count; i++) { var selHadith = hadist[i]; var hadistIndex = (from c in ctx.HadithIndexes where c.HadithID == selHadith.HadithID orderby c.No select c).ToList(); for (int j = 0; j < hadistIndex.Count; j++) { var selIndex = hadistIndex[j]; var selURL = string.Format("http://sunnah.com/{0}/{1}", selHadith.Name, selIndex.No); try { var Webget = new HtmlWeb(); var doc = Webget.Load(selURL); HadithChapter selChapter = null; int counter = 0; HadithPage selPage = new HadithPage(); selPage.PageNo = selIndex.No; selPage.HadithID = selHadith.HadithID; //get title foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//div")) { if (node.Attributes["class"] != null && !string.IsNullOrEmpty(node.Attributes["class"].Value)) { try { switch (node.Attributes["class"].Value) { case "book_page_english_name": selPage.Title = node.InnerHtml; break; case "book_page_arabic_name arabic": selPage.TitleArabic = node.InnerHtml; //ctx.HadithPages.Add(selPage); break; case "chapter": selChapter = new HadithChapter(); selChapter.HadithID = selHadith.HadithID; selChapter.PageNo = selPage.PageNo; //iterate every chapter var chapterNode = node; { var subnode = chapterNode.SelectSingleNode(".//div[@class='echapno']"); { try { selChapter.ChapterNo = Convert.ToInt32(subnode.InnerText.Replace("(", "").Replace(")", "")); } catch { selChapter.ChapterNoStr = subnode.InnerText.Trim(); var Parsed = subnode.InnerText.Replace("(", "").Replace(")", ""); if (Parsed.Contains(',')) { selChapter.ChapterNo = Convert.ToInt32(Parsed.Split(',')[0]); } else { for (int z = 0; z < Parsed.Length; z++) { if (!(Parsed[z] >= '0' && Parsed[z] <= '9')) { Parsed = Parsed.Replace(Parsed[z].ToString(), " "); } } selChapter.ChapterNo = Convert.ToInt32(Parsed.Trim()); } } } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='englishchapter']"); if (subnode != null) { selChapter.Title = subnode.InnerText.Trim(); } } { var subnode = chapterNode.SelectSingleNode(".//div[@class='arabicchapter arabic']"); if (subnode != null) { selChapter.TitleArabic = subnode.InnerText.Trim(); } } ctx.HadithChapters.Add(selChapter); counter++; if (counter > 100) { ctx.SaveChanges(); counter = 0; } break; case "arabic achapintro": { selChapter.Intro = node.InnerText; } break; default: break; } } catch (Exception ex) { Console.WriteLine("error dalam:" + ex.Message + "-" + ex.StackTrace); continue; } } } } catch (Exception ex) { Console.WriteLine("error luar:"+ex.Message + "-" + ex.StackTrace); } ctx.SaveChanges(); } } Console.WriteLine("selesai baca chapter"); Console.ReadLine(); }
static void RepairMalikChapter() { HadithDBEntities ctx = new HadithDBEntities(); var data = from c in ctx.HadithPages where c.HadithID == 7 select c; foreach (var item in data) { HadithChapter chap = new HadithChapter(); chap.PageNo = item.PageNo; chap.HadithID = item.HadithID; chap.Title = item.Title; chap.TitleArabic = item.TitleArabic; chap.ChapterNo = item.PageNo; ctx.HadithChapters.Add(chap); } ctx.SaveChanges(); }