private void ParseChapterRow(HtmlNode tr)
        {
            var item = new Book();

            var topTd = hh.GetSingleDirectChildByType(tr, "td");
            var chapterUrl = hh.GetSingleDirectChildByType(topTd, "a");
            var lowerTd = hh.GetSingleDirectChildByType(topTd, "td");

            item.Name = metaData.Name.Trim();
            item.IndexPage = new Uri(chapterUrl.Attributes["href"].Value, UriKind.Absolute);
            item.LastestUpdateChapterName = chapterUrl.InnerText.Trim();
            item.WebSite = new WebSite();
            item.WebSite.WebSiteName = hh.GetSingleDirectChildByType(lowerTd, "font").InnerText.Trim();
            item.LastUpdateTime = DateTime.Parse(hh.GetSingleDirectChildByType(lowerTd, "td").InnerText);
            
            if (item.WebSite.WebSiteName.Contains(WebsiteFilter1) 
                || item.WebSite.WebSiteName.Contains(WebsiteFilter2))
                return;

            var websiteBookPairAlreadyExists = (from i in items
                                                where i.WebSite.WebSiteName == item.WebSite.WebSiteName
                                                select i).FirstOrDefault();

            if (websiteBookPairAlreadyExists == null )
            {
                items.Add(item);
            }
        }
        private void ParseChapterRow(HtmlNode tr)
        {
            var item = new Book();

            var chaperTd = HtmlParseHelper.GetSingleDirectChildByType(tr, "td");
            var chaperUrl = HtmlParseHelper.GetSingleDirectChildByType(chaperTd, "a");
            var websiteTd = HtmlParseHelper.GetSingleDirectChildByTypeAndIndex(tr, "td", 1);
            var websiteName = HtmlParseHelper.GetSingleDirectChildByType(websiteTd, "a");
            var websiteIndexTd = HtmlParseHelper.GetSingleDirectChildByTypeAndIndex(tr, "td", 2);
            var websiteIndexUrl = HtmlParseHelper.GetSingleDirectChildByType(websiteIndexTd, "a");

            item.LastUpdateTime = DateTime.Now;


            item.Name = metaData.Name.Trim();
            item.IndexPage = new Uri("http://www.xiaoelang.com" + websiteIndexUrl.Attributes["href"].Value, UriKind.Absolute);
            item.LastestUpdateChapterName = chaperUrl.InnerText.Trim();
            item.WebSite = new WebSite();
            item.WebSite.WebSiteName = websiteName.InnerText.Trim();

            if (item.WebSite.WebSiteName.Contains(websiteFilter1) || item.WebSite.WebSiteName.Contains(websiteFilter2))
                return;

            var websiteBookPairAlreadyExists = (from i in items
                                                where i.WebSite.WebSiteName == item.WebSite.WebSiteName
                                                select i).FirstOrDefault();

            if (websiteBookPairAlreadyExists == null)
            {
                items.Add(item);
            }
        }
Example #3
0
        public Chapter[] GetChaptersByBook(Book book)
        {
            var chapters = from c in _db.Chapters
                           where c.Book.Name == book.Name
                           select c;

            return chapters.ToArray();
        }
        public BookIndexViewModel (Book targetBook)
        {
            if (targetBook != null ) Book = targetBook;
            DownloadStartIndex = 1;

            int chapterToBeDownloadedCount;
            ChapterToBeDownloadedCount = !AppSetting.TryGetSetting("DefaultDownloadBatchSize", out chapterToBeDownloadedCount) ?
                                         10 : chapterToBeDownloadedCount;
        }
Example #5
0
 private void AddOneRecord()
 {
     var book = new Book();
     book.Author = "Charlie";
     book.IndexPage = new Uri(String.Format("http://www.xiaoshuo999.org/files/article/html/0/421/1077782.html"), UriKind.Absolute);
     book.LastReadChapterId = 0;
     book.Name = "宰执天下";
     book.LastUpdateTime = DateTime.Today;
     book.WebSite = new WebSite() { LandingPage = book.IndexPage.ToString(), SearchEntry = null, WebSiteName = "起点" };
     //db.WebSites.InsertOnSubmit(book.WebSite);
     //db.Books.InsertOnSubmit(book);
     //db.SubmitChanges();
 }
Example #6
0
 private Book GetFakeBook(WebSite webSite)
 {
     var book = new Book();
     book.Author = "Charlie";
     book.IndexPage = new Uri(String.Format("http://www.xiaoshuo999.org/files/article/html/0/421/1077782.html"), UriKind.Absolute);
     book.LastReadChapterId = 0;
     book.Name = "宰执天下";
     book.LastUpdateTime = DateTime.Today;
     book.WebSite = webSite;
     book.RootUrl = "http://www.wanshuba.com";
     //Storage.SaveBook(book);
     //db.Books.InsertOnSubmit(book);
     return book;
 }
        /// <summary>
        /// Get the book and website pair list information
        /// </summary>
        /// <param name="inputStream"></param>
        /// <param name="state"></param>
        /// <returns></returns>
        public object Parse(Stream inputStream, object state)
        {
            metaData = state as Book;
            var doc = new HtmlDocument();
            doc.Load(inputStream);

            var body = hh.GetSingleChildByTypeChain(doc.DocumentNode, new[] { "html", "body" });

            var table = hh.GetSingleDirectChildByTypeAndIndex(body, "table", 4);
            var x = hh.GetDirectChildrenByType(table, "tr").ToArray();
            var chapterRows = hh.SubArray(x, 2, 9 ).ToArray();

            foreach (var tr in chapterRows)
            {
                ParseChapterRow(tr);
            }
            return items;
        }
Example #8
0
        public void UpdateBook(Book _book)
        {
            if (_book == null) throw new ArgumentNullException("_book");
            var target = from book in _db.Books
                         where book.Name == _book.Name
                         select book;
            if (target.Count() > 0)
            {
                var book = target.First();
                book.Name = _book.Name;
                book.IndexPage = _book.IndexPage;
                book.LastReadChapterId = _book.LastReadChapterId;
                book.LastUpdateTime = _book.LastUpdateTime;
                book.WebSite = _book.WebSite;
                book.Author = _book.Author;
            }

            _db.SubmitChanges();
        }
        private void BookSelected(object sender, RoutedEventArgs e)
        {
            ProgressIndicatorHelper.StartProgressIndicator(true, "解析书籍目录链接");

            var book = ((Button) sender).DataContext as Book;

            targetBook = Model.CheckBookExists(book);
            Model.GetBookIndexPageCompleted += GetBookIndexPageCompleted;
           
            if (targetBook != null)
            {
                if (targetBook.Chapters == null )
                {
                    targetBook.Chapters = PhoneStorage.GetPhoneStorageInstance().GetChaptersByBook(targetBook);
                }
                Model.GetBookSiteBookIndexPageLink(targetBook);
            }
            else
            {
                targetBook = book;
                Model.GetBookSiteBookIndexPageLink(book);
            }
        }
Example #10
0
        public void PrepareTempDB()
        {

            using (var db1 = new SmartReaderDataContext("isostore:/SmartReader.sdf"))
            {
                if (db1.DatabaseExists() == false)
                {
                    db1.CreateDatabase();
                }
            }

            Storage = PhoneStorage.GetPhoneStorageInstance();

            TestWebSite = GetFakeWebSite();
            TestBook = GetFakeBook(TestWebSite);
            TestTextChapter = GetFakeTextChapter(TestBook);
            TestImageChapter = GetFakeImageChapter(TestBook);
            TestBook.Chapters = new[]{ TestTextChapter, TestImageChapter};

            Storage.SaveWebSite(TestWebSite);
            Storage.SaveBook(TestBook);
            Storage.SaveChapters(TestBook.Chapters);
            //CreateFakeArticleImage(chapter);
        }
Example #11
0
 public ChapterViewModel(Book book)
 {
     CurrentBook = book;
 }
Example #12
0
 public void DeleteBook(Book deleteBook)
 {
     _storage.DeleteBook(deleteBook);
     RefreshBookList();
 }
Example #13
0
        public Book CheckBookExists(Book book)
        {
            var storage = PhoneStorage.GetPhoneStorageInstance();

            foreach (var b in storage.GetAllBooks())
            {
                if (b.Name == book.Name && b.WebSite.WebSiteName == book.WebSite.WebSiteName)
                {
                    return b;
                }
            }
            return null;
        }
Example #14
0
        public void DownloadAndParseWebSiteBookIndexPage(Book book)
        {
            var downloader = new HttpContentDownloader();
            downloader.Download(book.IndexPage,
                ar =>
                {
                    try
                    {
                        //At this step, we can get the index page in the search engine 
                        var state = (RequestState)ar.AsyncState;
                        var response = (HttpWebResponse)state.Request.EndGetResponse(ar);
                        response.GetResponseStream();
                        book.RootUrl = UrlHelper.GetRootUrlString(response.ResponseUri);
                        var parser = new WebsiteBookIndexPageParser();
                        try
                        {
                            ProgressIndicatorHelper.CrossThreadStartProgressIndicator(true, "下载完成,解析目录中");
                            parser.Parse(response.GetResponseStream(), book);
                            SelectedBook = book;
                            ModelManager.GetBookIndexModel().Book = SelectedBook;

                            if (GetBookIndexPageCompleted != null)
                            {
                                GetBookIndexPageCompleted(this, null);
                            }
                        }
                        catch (Exception e)
                        {
                            ExceptionHandler.HandleException(e);
                        }
                    }
                    catch (WebException e)
                    {
                        if (e.Status == WebExceptionStatus.RequestCanceled)
                        {
                            throw new TimeoutException(String.Format("连接{0}目录页超时", book.WebSite.WebSiteName));
                        }
                    }
              
                });
        }
Example #15
0
        public void GetBookSiteBookIndexPageLink(Book book)
        {
            var downloader = new HttpContentDownloader();
            try
            {
                downloader.Download(book.IndexPage,
                    ar =>
                    {
                        try
                        {
                            //At this step, we can get the index page in the search engine 
                            var state = (RequestState)ar.AsyncState;
                            var response = (HttpWebResponse)state.Request.EndGetResponse(ar);
                            response.GetResponseStream();

                            book.RootUrl = UrlHelper.GetRootUrlString(response.ResponseUri);
                            var parser = new WebSiteBookContentPageParser();
                            parser.Parse(response.GetResponseStream(), book);

                            DownloadAndParseWebSiteBookIndexPage(book);

                        }
                        catch (WebException e)
                        {
                            if (e.Status == WebExceptionStatus.RequestCanceled)
                            {
                                throw new TimeoutException(String.Format("连接{0}超时", book.WebSite.WebSiteName));
                            }
                        }

                    });
            }
            catch (WebException we)
            {
                ExceptionHandler.HandleException(we);
            }
        }
Example #16
0
        public void DeleteBook(Book _book)
        {
            var target = from book in _db.Books
                         where book.Id == _book.Id
                         select book;

            if (target.Count() > 0)
            {
                _db.Books.DeleteOnSubmit(target.First());
            }

            DeleteChaptersOfBook(_book);
            _db.SubmitChanges();
        }
        /// <summary>
        /// Get Url of the website book index page
        /// </summary>
        /// <param name="node">expect body node here</param>
        /// <param name="item"></param>
        public void GetIndexPageLink (HtmlNode node, Book item)
        {
            const string indexText1 = "回目录";
            const string indexText2 = "回书目";
            const string indexText3 = "目 录";

            var reader = new StringReader(node.InnerHtml);
            var sb = new StringBuilder();
            while (reader.Peek() > 0)
            {
                var line = reader.ReadLine();
                sb.Append(line);
                if (line != null && (line.Contains(indexText1) || line.Contains(indexText2) || line.Contains(indexText3)))
                {

                    var startIndex = sb.ToString().LastIndexOf("<a");
                    var endIndex = sb.ToString().LastIndexOf("</a>");
                    var htmlLink = sb.ToString().Substring(startIndex, endIndex - startIndex + 4);

                    while (!(htmlLink.Contains(indexText1) && !htmlLink.Contains(indexText2) && !htmlLink.Contains(indexText3)))
                    {
                        var temp = sb.ToString().Substring(0, startIndex);
                        startIndex = temp.LastIndexOf("<a");
                        endIndex = temp.LastIndexOf("</a>");

                        if (startIndex > -1 && endIndex > -1)
                        {
                            htmlLink = temp.Substring(startIndex, endIndex - startIndex + 4);
                        }
                    }

                    var linkNode = new HtmlDocument();
                    linkNode.LoadHtml(htmlLink);
                    var aNode = hh.GetSingleDirectChildByType(linkNode.DocumentNode, "a");
                    var link = aNode.Attributes["href"].Value;
                    if (link.Contains("http"))
                    {
                        item.IndexPage = new Uri(link, UriKind.Absolute);
                        //item.IndexPageUri = new Uri(link, UriKind.Absolute);
                        return;
                    }
                    if (item.RootUrl.EndsWith(link))
                    {  
                        item.IndexPage = new Uri(item.RootUrl, UriKind.Absolute);
                        return;
                    }
                        
                    item.IndexPage = new Uri(item.RootUrl +  link, UriKind.Absolute);
                    //item.IndexPageUri = new Uri(item.PageRootUri + link, UriKind.Absolute);
                    return;
                }
            }
        }
        public void Refresh()
        {
            ProgressIndicatorHelper.StartProgressIndicator(true , "更新本书目录");
            var downloader = new HttpContentDownloader();
            DownloaderList.Add(downloader);
            downloader.Download(Book.IndexPage,
                ar =>
                {
                    //At this step, we can get the index page in the search engine 
                    var state = (RequestState)ar.AsyncState;
                    state.stopTimer = true;
                    var response = (HttpWebResponse)state.Request.EndGetResponse(ar);
                    response.GetResponseStream();

                    Book.RootUrl = UrlHelper.GetRootUrlString(response.ResponseUri);
                    var parser = new WebsiteBookIndexPageParser();

                    var temp = new Book {RootUrl = Book.RootUrl};
                    parser.Parse(response.GetResponseStream(), temp );

                    var newChapters = new List<Chapter>();
                    newChapters.AddRange(
                            temp.Chapters.Where
                            (
                                chapter => !Book.Chapters.Any(c => c.ChapterName == chapter.ChapterName)
                            ));

                    if (newChapters.Count > 0)
                    {
                        var totalChapters = new List<Chapter>();
                        totalChapters.AddRange(Book.Chapters);
                        totalChapters.AddRange(newChapters);
                        _storage.SaveChapters(newChapters);
                        Book.Chapters = totalChapters.ToArray();
                        LastPage();
                    }
                    else
                    {
                        CrossThreadHelper.CrossThreadMethodCall(() => MessageBox.Show("本书尚未有新的章节"));
                    }

                    ProgressIndicatorHelper.StopProgressIndicator();
                });
        }
Example #19
0
 public void SaveBook(Book book)
 {
     if (IsBookExist(book))
     {
         UpdateBook(book);
     }
     else
     {
         _db.Books.InsertOnSubmit(book);
     }
     _db.SubmitChanges();
 }
Example #20
0
        public bool IsBookExist (Book book)
        {
            var books = from b in _db.Books
                        where b.Id == book.Id
                        select b;

            if (books.Any()) { return true;}
            return false;
        }
Example #21
0
        private static void DeleteChaptersOfBook(Book _book)
        {
            var bookChapters = from chapter in _db.Chapters
                               where chapter.Book.Id == _book.Id
                               select chapter;

            if (bookChapters.Count() > 0)
            {
                _db.Chapters.DeleteAllOnSubmit(bookChapters);
            }

            DeleteArticleImagesOfBook(bookChapters);
        }
Example #22
0
        public void SaveCurrentBook(Book book)
        {
            var storage = PhoneStorage.GetPhoneStorageInstance();

            if (!storage.IsWebSiteExist(book.WebSite))
            {
                storage.SaveWebSite(book.WebSite);
            }

            if (!storage.IsBookExist(book))
            {
                storage.SaveBook(book);
                storage.SaveChapters(book.Chapters);
            }
        }
        public object Parse(Stream inputStream, object state)
        {
            var stopWatch = new Stopwatch();
            stopWatch.Start();
            book = state as Book;
            var content1 = EncodingHelper.FromGBKToUnicode(inputStream);
            var decodeTime = stopWatch.ElapsedMilliseconds;

            inputStream.Close();
            var doc = new HtmlDocument();
            doc.LoadHtml(content1);
            var body = hh.GetSingleChildByTypeChain(doc.DocumentNode, new string[] { "html", "body" });


            if (body == null) body = doc.DocumentNode;
            CleanHtmlTree(body);

            var cleanTreeTime = stopWatch.ElapsedMilliseconds - decodeTime;

            var indexNode = GetIndexContentNode(body);
            var hyperLinkNodes = new List<HtmlNode>();
            hh.GetAllHyperlinkElementWithFilter(indexNode, hyperLinkNodes);

            var getAllHyperLinkTime = stopWatch.ElapsedMilliseconds - cleanTreeTime - decodeTime;

            cleanHyperLinkNode(hyperLinkNodes);

            var cleanHyperLinkTime = stopWatch.ElapsedMilliseconds - cleanTreeTime - decodeTime - getAllHyperLinkTime;

            Debug.Assert(hyperLinkNodes.Count > 0);

            foreach (var link in hyperLinkNodes)
            {
                ParseIndexContent(link);
            }

            var parseIndexContentTime = stopWatch.ElapsedMilliseconds - cleanTreeTime - decodeTime - getAllHyperLinkTime -
                                        cleanHyperLinkTime;

            if (book.Chapters == null )
            {
                book.Chapters = chapterList.ToArray();
            }
            else
            {
                var oldList = book.Chapters.ToList();

                if (oldList.Count < chapterList.Count)
                {
                   for ( var i = oldList.Count; i< chapterList.Count ; i++ )
                   {
                       oldList.Add(chapterList[i]);
                   }
                }

                book.Chapters = oldList.ToArray(); 
            }

            var totalTime = stopWatch.ElapsedMilliseconds;
            stopWatch.Stop();

            var time =
                String.Format(
                    "cleanTreeTime {0}\n getAllHyperLinkTime {1}\n decodeTime {2}\n cleanHyperLinkTime {3}\n parseIndexContentTime {4}\n Totaltime {5}\n",
                    cleanTreeTime, getAllHyperLinkTime, decodeTime, cleanHyperLinkTime, parseIndexContentTime, totalTime);

            return book;
        }
Example #24
0
 private Chapter CreateFakeChapter(Book book)
 {
     var chapter = new Chapter();
     chapter.Book = book;
     chapter.SaveContent1 = "String Content";
     chapter.ChapterUri = new Uri("http://www.tszw.net/files/article/html/79/79194/2417331.html", UriKind.Absolute);
     chapter.ChapterName = "敌手的面目";
     chapter.LastUpdateTime = DateTime.Now;
     chapter.IsImageContent = true;
     //db.Chapters.InsertOnSubmit(chapter);
     Storage.SaveChapters(new Chapter[] { chapter });
     return chapter;
 }
        public void GetIndexPageLinkNew(HtmlNode node, Book item)
        {
            const string indexText1 = "回目录";
            const string indexText2 = "回书目";
            const string indexText3 = "目 录";

            var hyperLinkNodes = new List<HtmlNode>();
            hh.GetAllHyperlinkElementWithFilter(node, hyperLinkNodes);
            string url = string.Empty;
            foreach (var link in hyperLinkNodes)
            {
                if (link.InnerText.Contains(indexText1) 
                    || link.InnerText.Contains(indexText2) 
                    || link.InnerText.Contains(indexText3))
                {
                    url = link.Attributes["href"].Value;
                    break;
                }
            }

            if (url.Contains("http"))
            {
                item.IndexPage = new Uri(url, UriKind.Absolute);
                //item.IndexPageUri = new Uri(link, UriKind.Absolute);
                return;
            }
            if (item.RootUrl.EndsWith(url))
            {
                item.IndexPage = new Uri(item.RootUrl, UriKind.Absolute);
                return;
            }

            item.IndexPage = new Uri(item.RootUrl + url, UriKind.Absolute);
            //item.IndexPageUri = new Uri(item.PageRootUri + link, UriKind.Absolute);
            return;
        }