private void ParseChapterRow(HtmlNode tr) { var item = new Book(); var topTd = hh.GetSingleDirectChildByType(tr, "td"); var chapterUrl = hh.GetSingleDirectChildByType(topTd, "a"); var lowerTd = hh.GetSingleDirectChildByType(topTd, "td"); item.Name = metaData.Name.Trim(); item.IndexPage = new Uri(chapterUrl.Attributes["href"].Value, UriKind.Absolute); item.LastestUpdateChapterName = chapterUrl.InnerText.Trim(); item.WebSite = new WebSite(); item.WebSite.WebSiteName = hh.GetSingleDirectChildByType(lowerTd, "font").InnerText.Trim(); item.LastUpdateTime = DateTime.Parse(hh.GetSingleDirectChildByType(lowerTd, "td").InnerText); if (item.WebSite.WebSiteName.Contains(WebsiteFilter1) || item.WebSite.WebSiteName.Contains(WebsiteFilter2)) return; var websiteBookPairAlreadyExists = (from i in items where i.WebSite.WebSiteName == item.WebSite.WebSiteName select i).FirstOrDefault(); if (websiteBookPairAlreadyExists == null ) { items.Add(item); } }
private void ParseChapterRow(HtmlNode tr) { var item = new Book(); var chaperTd = HtmlParseHelper.GetSingleDirectChildByType(tr, "td"); var chaperUrl = HtmlParseHelper.GetSingleDirectChildByType(chaperTd, "a"); var websiteTd = HtmlParseHelper.GetSingleDirectChildByTypeAndIndex(tr, "td", 1); var websiteName = HtmlParseHelper.GetSingleDirectChildByType(websiteTd, "a"); var websiteIndexTd = HtmlParseHelper.GetSingleDirectChildByTypeAndIndex(tr, "td", 2); var websiteIndexUrl = HtmlParseHelper.GetSingleDirectChildByType(websiteIndexTd, "a"); item.LastUpdateTime = DateTime.Now; item.Name = metaData.Name.Trim(); item.IndexPage = new Uri("http://www.xiaoelang.com" + websiteIndexUrl.Attributes["href"].Value, UriKind.Absolute); item.LastestUpdateChapterName = chaperUrl.InnerText.Trim(); item.WebSite = new WebSite(); item.WebSite.WebSiteName = websiteName.InnerText.Trim(); if (item.WebSite.WebSiteName.Contains(websiteFilter1) || item.WebSite.WebSiteName.Contains(websiteFilter2)) return; var websiteBookPairAlreadyExists = (from i in items where i.WebSite.WebSiteName == item.WebSite.WebSiteName select i).FirstOrDefault(); if (websiteBookPairAlreadyExists == null) { items.Add(item); } }
public Chapter[] GetChaptersByBook(Book book) { var chapters = from c in _db.Chapters where c.Book.Name == book.Name select c; return chapters.ToArray(); }
public BookIndexViewModel (Book targetBook) { if (targetBook != null ) Book = targetBook; DownloadStartIndex = 1; int chapterToBeDownloadedCount; ChapterToBeDownloadedCount = !AppSetting.TryGetSetting("DefaultDownloadBatchSize", out chapterToBeDownloadedCount) ? 10 : chapterToBeDownloadedCount; }
private void AddOneRecord() { var book = new Book(); book.Author = "Charlie"; book.IndexPage = new Uri(String.Format("http://www.xiaoshuo999.org/files/article/html/0/421/1077782.html"), UriKind.Absolute); book.LastReadChapterId = 0; book.Name = "宰执天下"; book.LastUpdateTime = DateTime.Today; book.WebSite = new WebSite() { LandingPage = book.IndexPage.ToString(), SearchEntry = null, WebSiteName = "起点" }; //db.WebSites.InsertOnSubmit(book.WebSite); //db.Books.InsertOnSubmit(book); //db.SubmitChanges(); }
private Book GetFakeBook(WebSite webSite) { var book = new Book(); book.Author = "Charlie"; book.IndexPage = new Uri(String.Format("http://www.xiaoshuo999.org/files/article/html/0/421/1077782.html"), UriKind.Absolute); book.LastReadChapterId = 0; book.Name = "宰执天下"; book.LastUpdateTime = DateTime.Today; book.WebSite = webSite; book.RootUrl = "http://www.wanshuba.com"; //Storage.SaveBook(book); //db.Books.InsertOnSubmit(book); return book; }
/// <summary> /// Get the book and website pair list information /// </summary> /// <param name="inputStream"></param> /// <param name="state"></param> /// <returns></returns> public object Parse(Stream inputStream, object state) { metaData = state as Book; var doc = new HtmlDocument(); doc.Load(inputStream); var body = hh.GetSingleChildByTypeChain(doc.DocumentNode, new[] { "html", "body" }); var table = hh.GetSingleDirectChildByTypeAndIndex(body, "table", 4); var x = hh.GetDirectChildrenByType(table, "tr").ToArray(); var chapterRows = hh.SubArray(x, 2, 9 ).ToArray(); foreach (var tr in chapterRows) { ParseChapterRow(tr); } return items; }
public void UpdateBook(Book _book) { if (_book == null) throw new ArgumentNullException("_book"); var target = from book in _db.Books where book.Name == _book.Name select book; if (target.Count() > 0) { var book = target.First(); book.Name = _book.Name; book.IndexPage = _book.IndexPage; book.LastReadChapterId = _book.LastReadChapterId; book.LastUpdateTime = _book.LastUpdateTime; book.WebSite = _book.WebSite; book.Author = _book.Author; } _db.SubmitChanges(); }
private void BookSelected(object sender, RoutedEventArgs e) { ProgressIndicatorHelper.StartProgressIndicator(true, "解析书籍目录链接"); var book = ((Button) sender).DataContext as Book; targetBook = Model.CheckBookExists(book); Model.GetBookIndexPageCompleted += GetBookIndexPageCompleted; if (targetBook != null) { if (targetBook.Chapters == null ) { targetBook.Chapters = PhoneStorage.GetPhoneStorageInstance().GetChaptersByBook(targetBook); } Model.GetBookSiteBookIndexPageLink(targetBook); } else { targetBook = book; Model.GetBookSiteBookIndexPageLink(book); } }
public void PrepareTempDB() { using (var db1 = new SmartReaderDataContext("isostore:/SmartReader.sdf")) { if (db1.DatabaseExists() == false) { db1.CreateDatabase(); } } Storage = PhoneStorage.GetPhoneStorageInstance(); TestWebSite = GetFakeWebSite(); TestBook = GetFakeBook(TestWebSite); TestTextChapter = GetFakeTextChapter(TestBook); TestImageChapter = GetFakeImageChapter(TestBook); TestBook.Chapters = new[]{ TestTextChapter, TestImageChapter}; Storage.SaveWebSite(TestWebSite); Storage.SaveBook(TestBook); Storage.SaveChapters(TestBook.Chapters); //CreateFakeArticleImage(chapter); }
public ChapterViewModel(Book book) { CurrentBook = book; }
public void DeleteBook(Book deleteBook) { _storage.DeleteBook(deleteBook); RefreshBookList(); }
public Book CheckBookExists(Book book) { var storage = PhoneStorage.GetPhoneStorageInstance(); foreach (var b in storage.GetAllBooks()) { if (b.Name == book.Name && b.WebSite.WebSiteName == book.WebSite.WebSiteName) { return b; } } return null; }
public void DownloadAndParseWebSiteBookIndexPage(Book book) { var downloader = new HttpContentDownloader(); downloader.Download(book.IndexPage, ar => { try { //At this step, we can get the index page in the search engine var state = (RequestState)ar.AsyncState; var response = (HttpWebResponse)state.Request.EndGetResponse(ar); response.GetResponseStream(); book.RootUrl = UrlHelper.GetRootUrlString(response.ResponseUri); var parser = new WebsiteBookIndexPageParser(); try { ProgressIndicatorHelper.CrossThreadStartProgressIndicator(true, "下载完成,解析目录中"); parser.Parse(response.GetResponseStream(), book); SelectedBook = book; ModelManager.GetBookIndexModel().Book = SelectedBook; if (GetBookIndexPageCompleted != null) { GetBookIndexPageCompleted(this, null); } } catch (Exception e) { ExceptionHandler.HandleException(e); } } catch (WebException e) { if (e.Status == WebExceptionStatus.RequestCanceled) { throw new TimeoutException(String.Format("连接{0}目录页超时", book.WebSite.WebSiteName)); } } }); }
public void GetBookSiteBookIndexPageLink(Book book) { var downloader = new HttpContentDownloader(); try { downloader.Download(book.IndexPage, ar => { try { //At this step, we can get the index page in the search engine var state = (RequestState)ar.AsyncState; var response = (HttpWebResponse)state.Request.EndGetResponse(ar); response.GetResponseStream(); book.RootUrl = UrlHelper.GetRootUrlString(response.ResponseUri); var parser = new WebSiteBookContentPageParser(); parser.Parse(response.GetResponseStream(), book); DownloadAndParseWebSiteBookIndexPage(book); } catch (WebException e) { if (e.Status == WebExceptionStatus.RequestCanceled) { throw new TimeoutException(String.Format("连接{0}超时", book.WebSite.WebSiteName)); } } }); } catch (WebException we) { ExceptionHandler.HandleException(we); } }
public void DeleteBook(Book _book) { var target = from book in _db.Books where book.Id == _book.Id select book; if (target.Count() > 0) { _db.Books.DeleteOnSubmit(target.First()); } DeleteChaptersOfBook(_book); _db.SubmitChanges(); }
/// <summary> /// Get Url of the website book index page /// </summary> /// <param name="node">expect body node here</param> /// <param name="item"></param> public void GetIndexPageLink (HtmlNode node, Book item) { const string indexText1 = "回目录"; const string indexText2 = "回书目"; const string indexText3 = "目 录"; var reader = new StringReader(node.InnerHtml); var sb = new StringBuilder(); while (reader.Peek() > 0) { var line = reader.ReadLine(); sb.Append(line); if (line != null && (line.Contains(indexText1) || line.Contains(indexText2) || line.Contains(indexText3))) { var startIndex = sb.ToString().LastIndexOf("<a"); var endIndex = sb.ToString().LastIndexOf("</a>"); var htmlLink = sb.ToString().Substring(startIndex, endIndex - startIndex + 4); while (!(htmlLink.Contains(indexText1) && !htmlLink.Contains(indexText2) && !htmlLink.Contains(indexText3))) { var temp = sb.ToString().Substring(0, startIndex); startIndex = temp.LastIndexOf("<a"); endIndex = temp.LastIndexOf("</a>"); if (startIndex > -1 && endIndex > -1) { htmlLink = temp.Substring(startIndex, endIndex - startIndex + 4); } } var linkNode = new HtmlDocument(); linkNode.LoadHtml(htmlLink); var aNode = hh.GetSingleDirectChildByType(linkNode.DocumentNode, "a"); var link = aNode.Attributes["href"].Value; if (link.Contains("http")) { item.IndexPage = new Uri(link, UriKind.Absolute); //item.IndexPageUri = new Uri(link, UriKind.Absolute); return; } if (item.RootUrl.EndsWith(link)) { item.IndexPage = new Uri(item.RootUrl, UriKind.Absolute); return; } item.IndexPage = new Uri(item.RootUrl + link, UriKind.Absolute); //item.IndexPageUri = new Uri(item.PageRootUri + link, UriKind.Absolute); return; } } }
public void Refresh() { ProgressIndicatorHelper.StartProgressIndicator(true , "更新本书目录"); var downloader = new HttpContentDownloader(); DownloaderList.Add(downloader); downloader.Download(Book.IndexPage, ar => { //At this step, we can get the index page in the search engine var state = (RequestState)ar.AsyncState; state.stopTimer = true; var response = (HttpWebResponse)state.Request.EndGetResponse(ar); response.GetResponseStream(); Book.RootUrl = UrlHelper.GetRootUrlString(response.ResponseUri); var parser = new WebsiteBookIndexPageParser(); var temp = new Book {RootUrl = Book.RootUrl}; parser.Parse(response.GetResponseStream(), temp ); var newChapters = new List<Chapter>(); newChapters.AddRange( temp.Chapters.Where ( chapter => !Book.Chapters.Any(c => c.ChapterName == chapter.ChapterName) )); if (newChapters.Count > 0) { var totalChapters = new List<Chapter>(); totalChapters.AddRange(Book.Chapters); totalChapters.AddRange(newChapters); _storage.SaveChapters(newChapters); Book.Chapters = totalChapters.ToArray(); LastPage(); } else { CrossThreadHelper.CrossThreadMethodCall(() => MessageBox.Show("本书尚未有新的章节")); } ProgressIndicatorHelper.StopProgressIndicator(); }); }
public void SaveBook(Book book) { if (IsBookExist(book)) { UpdateBook(book); } else { _db.Books.InsertOnSubmit(book); } _db.SubmitChanges(); }
public bool IsBookExist (Book book) { var books = from b in _db.Books where b.Id == book.Id select b; if (books.Any()) { return true;} return false; }
private static void DeleteChaptersOfBook(Book _book) { var bookChapters = from chapter in _db.Chapters where chapter.Book.Id == _book.Id select chapter; if (bookChapters.Count() > 0) { _db.Chapters.DeleteAllOnSubmit(bookChapters); } DeleteArticleImagesOfBook(bookChapters); }
public void SaveCurrentBook(Book book) { var storage = PhoneStorage.GetPhoneStorageInstance(); if (!storage.IsWebSiteExist(book.WebSite)) { storage.SaveWebSite(book.WebSite); } if (!storage.IsBookExist(book)) { storage.SaveBook(book); storage.SaveChapters(book.Chapters); } }
public object Parse(Stream inputStream, object state) { var stopWatch = new Stopwatch(); stopWatch.Start(); book = state as Book; var content1 = EncodingHelper.FromGBKToUnicode(inputStream); var decodeTime = stopWatch.ElapsedMilliseconds; inputStream.Close(); var doc = new HtmlDocument(); doc.LoadHtml(content1); var body = hh.GetSingleChildByTypeChain(doc.DocumentNode, new string[] { "html", "body" }); if (body == null) body = doc.DocumentNode; CleanHtmlTree(body); var cleanTreeTime = stopWatch.ElapsedMilliseconds - decodeTime; var indexNode = GetIndexContentNode(body); var hyperLinkNodes = new List<HtmlNode>(); hh.GetAllHyperlinkElementWithFilter(indexNode, hyperLinkNodes); var getAllHyperLinkTime = stopWatch.ElapsedMilliseconds - cleanTreeTime - decodeTime; cleanHyperLinkNode(hyperLinkNodes); var cleanHyperLinkTime = stopWatch.ElapsedMilliseconds - cleanTreeTime - decodeTime - getAllHyperLinkTime; Debug.Assert(hyperLinkNodes.Count > 0); foreach (var link in hyperLinkNodes) { ParseIndexContent(link); } var parseIndexContentTime = stopWatch.ElapsedMilliseconds - cleanTreeTime - decodeTime - getAllHyperLinkTime - cleanHyperLinkTime; if (book.Chapters == null ) { book.Chapters = chapterList.ToArray(); } else { var oldList = book.Chapters.ToList(); if (oldList.Count < chapterList.Count) { for ( var i = oldList.Count; i< chapterList.Count ; i++ ) { oldList.Add(chapterList[i]); } } book.Chapters = oldList.ToArray(); } var totalTime = stopWatch.ElapsedMilliseconds; stopWatch.Stop(); var time = String.Format( "cleanTreeTime {0}\n getAllHyperLinkTime {1}\n decodeTime {2}\n cleanHyperLinkTime {3}\n parseIndexContentTime {4}\n Totaltime {5}\n", cleanTreeTime, getAllHyperLinkTime, decodeTime, cleanHyperLinkTime, parseIndexContentTime, totalTime); return book; }
private Chapter CreateFakeChapter(Book book) { var chapter = new Chapter(); chapter.Book = book; chapter.SaveContent1 = "String Content"; chapter.ChapterUri = new Uri("http://www.tszw.net/files/article/html/79/79194/2417331.html", UriKind.Absolute); chapter.ChapterName = "敌手的面目"; chapter.LastUpdateTime = DateTime.Now; chapter.IsImageContent = true; //db.Chapters.InsertOnSubmit(chapter); Storage.SaveChapters(new Chapter[] { chapter }); return chapter; }
public void GetIndexPageLinkNew(HtmlNode node, Book item) { const string indexText1 = "回目录"; const string indexText2 = "回书目"; const string indexText3 = "目 录"; var hyperLinkNodes = new List<HtmlNode>(); hh.GetAllHyperlinkElementWithFilter(node, hyperLinkNodes); string url = string.Empty; foreach (var link in hyperLinkNodes) { if (link.InnerText.Contains(indexText1) || link.InnerText.Contains(indexText2) || link.InnerText.Contains(indexText3)) { url = link.Attributes["href"].Value; break; } } if (url.Contains("http")) { item.IndexPage = new Uri(url, UriKind.Absolute); //item.IndexPageUri = new Uri(link, UriKind.Absolute); return; } if (item.RootUrl.EndsWith(url)) { item.IndexPage = new Uri(item.RootUrl, UriKind.Absolute); return; } item.IndexPage = new Uri(item.RootUrl + url, UriKind.Absolute); //item.IndexPageUri = new Uri(item.PageRootUri + link, UriKind.Absolute); return; }