/// <summary> /// Restore a page /// </summary> /// <param name="pageId">The page identifier</param> /// <returns>The restored page</returns> private static async Task <SkrapedPage> RestorePage(int pageId) { var restoredPage = new SkrapedPage { Id = pageId }; var loaded = LocalSettings.Values.ContainsKey("Page" + pageId) && (bool)LocalSettings.Values["Page" + pageId]; var container = Roaming.CreateContainer( PageKey, ApplicationDataCreateDisposition.Always); var compositeValue = container.Values[pageId.ToString()] as ApplicationDataCompositeValue; if (compositeValue != null) { restoredPage.Url = new Uri(compositeValue[UrlProperty].ToString()); restoredPage.Title = compositeValue[TitleProperty].ToString(); restoredPage.ThumbnailPath = loaded ? new Uri(compositeValue[ThumbnailProperty].ToString()) : new Uri("ms-appx:///Assets/ie.png"); restoredPage.Loaded = loaded; var imageCount = (int)compositeValue[ImageCountProperty]; for (var idx = 0; idx < imageCount; idx++) { restoredPage.Images.Add(new Uri(compositeValue[ImageIndex + idx].ToString())); } } await RestorePageData(restoredPage); return(restoredPage); }
/// <summary> /// The save page. /// </summary> /// <param name="page"> /// The page. /// </param> /// <returns> /// The <see cref="Task"/> for asynchronous saving. /// </returns> public async Task SavePage(SkrapedPage page) { // save loaded status locally, so remote machines will load on first use LocalSettings.Values["Page" + page.Id] = page.Loaded; var compositeValue = new ApplicationDataCompositeValue { { IdProperty, page.Id }, { TitleProperty, page.Title }, { ThumbnailProperty, page.ThumbnailPath.ToString() }, { UrlProperty, page.Url.ToString() }, { ImageCountProperty, page.Images.Count() } }; for (var idx = 0; idx < page.Images.Count(); idx++) { compositeValue.Add(ImageIndex + idx, page.Images[idx].ToString()); } var container = Roaming.CreateContainer( PageKey, ApplicationDataCreateDisposition.Always); container.Values[page.Id.ToString()] = compositeValue; await SavePageData(page); }
/// <summary> /// The get group for page. /// </summary> /// <param name="pageToCheck"> /// The page to check. /// </param> /// <returns> /// The <see cref="SkrapeGroup"/>. /// </returns> public SkrapeGroup GetGroupForPage(SkrapedPage pageToCheck) { return(groups.SelectMany( @group => @group.Pages, (@group, page) => new { @group, page }) .Where(pageAndGroup => pageAndGroup.page.Id == pageToCheck.Id) .Select(pageAndGroup => pageAndGroup.group) .FirstOrDefault()); }
/// <summary> /// The delete page method. /// </summary> /// <param name="page"> /// The page. /// </param> public void DeletePage(SkrapedPage page) { var container = Roaming.CreateContainer( PageKey, ApplicationDataCreateDisposition.Always); if (container.Values.ContainsKey(page.Id.ToString())) { container.Values.Remove(page.Id.ToString()); } }
/// <summary> /// The generate test data method. /// </summary> private void GenerateTestData() { var baseuri = new Uri("http://csharperimage.jeremylikness.com/", UriKind.Absolute); var groupNames = new[] { "Alpha", "Beta", "Omega " }; foreach (var groupName in groupNames) { var pages = new[] { "/2013/02/review-of-lenovo-ideapad-yoga-13-for.html", "/2010/04/model-view-viewmodel-mvvm-explained.html", "/2010/10/so-whats-fuss-about-silverlight.html", "/2013/01/traveling-with-microsoft-and-asus.html" }; var images = new[] { new Uri( "http://1.bp.blogspot.com/-R7LqKNm9BBk/Tl4oD5P9JdI/AAAAAAAAAZ8/pZ8J8gz_e5E/s1600/BlogHeader4.png"), new Uri( "http://lh6.ggpht.com/-YPUxFruIDFA/USFp0raqHAI/AAAAAAAAA6E/bHy3vg05Ed4/picture005_thumb2.jpg?imgmax=800"), new Uri( "http://lh6.ggpht.com/-WYkLA5K30mw/UObJc7Ls6bI/AAAAAAAAA3A/2gng-TfN6aE/WP_000232_thumb%25255B1%25255D.jpg?imgmax=800") }; var group = new SkrapeGroup(); group.Id = groupIdProvider.GetId(); group.Title = groupName; group.Description = string.Format( "The {0} group for holding a set of test pages for the sample data of the application.", groupName); for (var x = 0; x < 50; x++) { var pageName = pages[x % pages.Length]; var pageUri = new Uri(baseuri, pageName); var page = new SkrapedPage { Id = pageIdProvider.GetId(), Url = pageUri, Title = pageName.Replace("/", " "), ThumbnailPath = new Uri("ms-appx:///Assets/Logo.png") }; foreach (var image in images) { page.Images.Add(image); } group.Pages.Add(page); } groups.Add(group); } }
/// <summary> /// The delete page method /// </summary> /// <param name="page"> /// The page. /// </param> public void DeletePage(SkrapedPage page) { var parent = GetGroupForPage(page); page.Deleted = true; parent.Pages.Remove(page); Manager.DeletePage(page); if (parent.Pages.Any()) { return; } groups.Remove(parent); Manager.DeleteGroup(parent); }
/// <summary> /// The add url. /// </summary> /// <param name="url"> /// The url. /// </param> /// <returns> /// The <see cref="Task"/> to run asynchronously. /// </returns> public async Task AddUrl(Uri url) { var existingPage = groups.SelectMany(g => g.Pages, (g, p) => new { g, p }) .Where(@t => [email protected] && @t.p.Url == url) .Select(@t => @t.p).Any(); if (existingPage) { return; } var page = new SkrapedPage { Id = pageIdProvider.GetId(), Url = url, Title = "New Skrape - Tap to Load", ThumbnailPath = new Uri("ms-appx:///Assets/ie.png") }; var domain = url.Host.ToLower(); var group = groups.FirstOrDefault(g => g.Title == domain); if (group != null) { group.Pages.Add(page); await Manager.SaveGroup(group); } else { var newGroup = new SkrapeGroup { Id = groupIdProvider.GetId(), Title = domain, Description = string.Format("Collection of Skrapes for the {0} domain.", domain) }; newGroup.Pages.Add(page); Groups.Add(newGroup); await Manager.SaveGroup(newGroup); } await Manager.SavePage(page); Manager.AddUri(page.Url); }
/// <summary> /// The restore page data. /// </summary> /// <param name="page"> /// The page. /// </param> /// <returns> /// The <see cref="Task"/>. /// </returns> private static async Task RestorePageData(SkrapedPage page) { var folder = await Local.CreateFolderAsync(PageFolder, CreationCollisionOption.OpenIfExists); try { using (var fileStream = await folder.OpenStreamForReadAsync(string.Format(ZipTemplate, page.Id))) { using (var zipArchive = new ZipArchive(fileStream, ZipArchiveMode.Read)) { var htmlEntry = zipArchive.GetEntry(HtmlEntry); using (var htmlStream = new StreamReader(htmlEntry.Open())) { page.Html = await htmlStream.ReadToEndAsync(); } var textEntry = zipArchive.GetEntry(TextEntry); using (var textStream = new StreamReader(textEntry.Open())) { page.Text = await textStream.ReadToEndAsync(); } } // using (var decompressor = new Decompressor(fileStream.AsInputStream())) // { // var decompressionStream = decompressor.AsStreamForRead(); // // read the first 4 bytes to get the size of the entire buffer // var sizeBytes = new byte[sizeof(int)]; // await decompressionStream.ReadAsync(sizeBytes, 0, sizeof(int)); // var totalSize = BitConverter.ToInt32(sizeBytes, 0); // var byteBuffer = new byte[totalSize]; // await decompressionStream.ReadAsync(byteBuffer, 0, totalSize); // page.Html = Encoding.UTF8.GetString(byteBuffer, 0, byteBuffer.Length); // } } } catch (FileNotFoundException) { // empty catch block is because file will simply throw exception if not found } }
/// <summary> /// The get html for web page. /// </summary> /// <param name="page"> /// The page. /// </param> /// <returns> /// The <see cref="Task"/> to run asynchronously. /// </returns> public async Task GetHtmlForWebPage(SkrapedPage page) { var client = new HttpClient(); try { var html = await client.GetStringAsync(page.Url); page.Html = html; var titleMatches = TitlePattern.Matches(html); foreach ( var match in titleMatches.Cast <Match>() .Where(match => match.Groups.Count > 0)) { page.Title = match.Groups[1].Value; break; } page.Text = await ParseHtmlToText(page.Html); var images = (await ExtractImagesFromPage(page.Html)).ToArray(); if (images.Length <= 0) { return; } page.Images.Clear(); foreach (var image in images) { page.Images.Add(image); } } catch (Exception ex) { page.Html = page.Text = ex.Message; } }
/// <summary> /// The save page data. /// </summary> /// <param name="page"> /// The page. /// </param> /// <returns> /// The <see cref="Task"/> for asynchronous save operations. /// </returns> private static async Task SavePageData(SkrapedPage page) { if (string.IsNullOrWhiteSpace(page.Html)) { return; } var folder = await Local.CreateFolderAsync( PageFolder, CreationCollisionOption.OpenIfExists); var file = await folder.CreateFileAsync(string.Format(ZipTemplate, page.Id), CreationCollisionOption.ReplaceExisting); using (var zip = new ZipArchive(await file.OpenStreamForWriteAsync(), ZipArchiveMode.Create)) { ZipArchiveEntry htmlEntry = zip.CreateEntry(HtmlEntry); using (var htmlStream = new StreamWriter(htmlEntry.Open())) { await htmlStream.WriteAsync(page.Html); } ZipArchiveEntry textEntry = zip.CreateEntry(TextEntry); using (var textStream = new StreamWriter(textEntry.Open())) { await textStream.WriteAsync(page.Text); } } // using (var compressor = new Compressor(await file.OpenAsync(FileAccessMode.ReadWrite))) // { // var htmlBytes = Encoding.UTF8.GetBytes(page.Html); // await compressor.WriteAsync(BitConverter.GetBytes(htmlBytes.Length).AsBuffer()); // await compressor.WriteAsync(htmlBytes.AsBuffer()); // await compressor.FinishAsync(); // } }