/// <summary> /// Fetch geocache base on ocd._Code and download HTML page to list images to grab /// Images will be stored in ocd._ImageFilesSpoilers /// </summary> /// <param name="ocd">OCD object to complete</param> /// <param name="bUseKeyWords">If true, spoilers will be downloaded based on keywords provided in keywordsspoiler</param> /// <param name="htmlCode">Cache html page</param> public void GetImageFromParsingImpl(OfflineCacheData ocd, bool bUseKeyWords, string htmlCode) { // Patch String chunk = MyTools.GetSnippetFromText("<ul class=\"CachePageImages NoPrint\">", "</ul>", htmlCode); chunk = "<html><body>" + chunk + "</body></html>"; // Parse HTML to retrieve links // Load the Html into the agility pack HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(chunk); // Now, using LINQ to get all Images var linkNodes = doc.DocumentNode.SelectNodes("//a[@href]"); if (linkNodes != null) { //String ahref = ""; foreach (HtmlAgilityPack.HtmlNode node in linkNodes) { if (node.Attributes.Contains("href")) { String name = HtmlAgilityPack.HtmlEntity.DeEntitize(node.InnerText); bool bKeep = true; if (bUseKeyWords) { bKeep = false; // Check if one keyword is contained in the name String n = name.ToLower(); foreach (String s in keywordsspoiler) { if (n.Contains(s)) { bKeep = true; break; } } } if (bKeep) { String url = node.Attributes["href"].Value; OfflineImageWeb oiw = new OfflineImageWeb(); oiw._url = url; oiw._localfile = ocd._Code + "_P_" + Guid.NewGuid(); oiw._name = name; ocd._ImageFilesSpoilers.Add(oiw._url, oiw); } } } } }
/// <summary> /// This method will be called when the thread is started. /// Perform download /// </summary> public void DoWork() { if ((ocds != null) && (ocds.Count != 0)) { _daddy.bOfflineDownloadInProgress = true; String offdatapath = _daddy.GetUserDataPath() + Path.DirectorySeparatorChar + "Offline"; // On continue à télécharger les images en anonyme, pas besoin du cookie ici WebClient client = new WebClient(); AssignProxy(client); // Compute how many files to download int iFiles = ocds.Count; // 3* : because 3 steps : parse html, download description, download galerie threadprogress.progressBar1.Maximum = 3 * iFiles; threadprogress.lblWait.Text = ""; bool bNice = true; // Download each file foreach (OfflineCacheData ocd2 in ocds) { // Only now we delete local files of this single cache only // Do it BEFORE changing the file list ;-) ocd2.PurgeFiles(offdatapath); // Get geocache Geocache geo = null; if (_daddy._caches.ContainsKey(ocd2._Code)) { geo = _daddy._caches[ocd2._Code]; } else { // ????? continue; // skip this one } // This is a real geocache // *********************** if (!bGetFromGallery) { // La vielle méthode un peu incimpréhensible qui marche OldSchoolGrabbing(ocd2); } else { // On récupère de la gallery bool status = false; String url = geo._Url; if (url == "") { url = "https://coord.info/" + geo._Code; // mais ça risque de ne pas marcher en résolution de nom } // On récupère les images data List <Tuple <String, String, String> > imgdata = GetAllImageUrlsFromCacheGallery(url, ref status); if (!status) { // ca a merdé, on utilise la méthode de base OldSchoolGrabbing(ocd2); } else { // Libère la mémoire ocd2._descHTML = ""; // Ca a marché. Cool // Tuple : link, name, locafile // Now, deal with these spoilers bool bUseKeyWords = false; if ((keywordsspoiler != null) && (keywordsspoiler.Count != 0)) { bUseKeyWords = true; } foreach (Tuple <String, String, String> tpl in imgdata) { bool bKeep = true; if (bUseKeyWords) { bKeep = false; // Check if one keyword is contained in the name String n = tpl.Item2.ToLower(); foreach (String s in keywordsspoiler) { if (n.Contains(s)) { bKeep = true; break; } } } if (bKeep) { OfflineImageWeb oiw = new OfflineImageWeb(); oiw._url = tpl.Item1; oiw._localfile = ocd2._Code + "_P_" + tpl.Item3; oiw._name = tpl.Item2; ocd2._ImageFilesSpoilers.Add(oiw._url, oiw); } } } } threadprogress.Step(); // Grab each picture if (threadprogress._bAbort || _shouldStop) { ocd2._bAborted = true; } else { // Images from description foreach (KeyValuePair <String, String> paire in ocd2._ImageFilesFromDescription) { if (threadprogress._bAbort || _shouldStop) { ocd2._bAborted = true; break; // Stop for the current files } String url = paire.Key; string fileName = paire.Value; String localfile = offdatapath + Path.DirectorySeparatorChar + String.Format("{0}", fileName); try { client.DownloadFile(new Uri(url), localfile); } catch (Exception) { // Keep what we downloaded anyway TryBase64Conversion(url, localfile); } } threadprogress.Step(); // Images from parsing foreach (KeyValuePair <String, OfflineImageWeb> paire2 in ocd2._ImageFilesSpoilers) { if (threadprogress._bAbort || _shouldStop) { ocd2._bAborted = true; break; // Stop for the current files } String url = paire2.Key; string fileName = paire2.Value._localfile; String localfile = offdatapath + Path.DirectorySeparatorChar + String.Format("{0}", fileName); try { client.DownloadFile(new Uri(url), localfile); } catch (Exception) { // Keep what we downloaded anyway TryBase64Conversion(url, localfile); } } threadprogress.Step(); // IMPORTANT // If we reached this far, we assume the cache has been parsed // properly (except if we aborted in between of course) // So we set the attribute ocd1._bJustCreated to false // Because in post processing, the "justcreated" caches will be removed ocd2._NotDownloaded = false; // Security from spider web : delay ? if (delay != 0) { if (threadprogress._bAbort || _shouldStop) { // Do nothing } else { threadprogress.lblWait.Text = (bNice ? @"- " : @"_ ") + String.Format(_daddy.GetTranslator().GetString("LblWaiting"), delay); System.Threading.Thread.Sleep(delay * 1000); threadprogress.lblWait.Text = ""; bNice = !bNice; } } } if (threadprogress._bAbort || _shouldStop) { // Stop the loop break; } } threadprogress.Hide(); threadprogress.Dispose(); } _daddy.bOfflineDownloadInProgress = false; _daddy.NotifyEndOfThread(this); }
private bool GetSpoilersFromDescription(OfflineCacheData ocd) // cf. Surfoo { List <KeyValuePair <String, String> > spoilers = new List <KeyValuePair <string, string> >(); try { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(ocd._descHTML); var commentNodes = doc.DocumentNode.SelectNodes("//comment()"); if (commentNodes != null) { foreach (HtmlAgilityPack.HtmlNode node in commentNodes) { String cmt = node.InnerHtml; if (cmt.StartsWith("<!-- Spoiler4Gpx [")) { int ipos1, ipos2; String sName = "", sLink = ""; ipos1 = cmt.IndexOf("["); ipos2 = cmt.IndexOf("](http"); if ((ipos1 != -1) && (ipos2 != -1) && (ipos1 < ipos2)) { sName = cmt.Substring(ipos1 + 1, ipos2 - ipos1 - 1); } ipos1 = cmt.IndexOf("](http"); ipos2 = cmt.IndexOf(") -->"); if ((ipos1 != -1) && (ipos2 != -1) && (ipos1 < ipos2)) { sLink = cmt.Substring(ipos1 + 2, ipos2 - ipos1 - 2); } if ((sName != "") && (sLink != "")) { spoilers.Add(new KeyValuePair <string, string>(sName, sLink)); } } } } if (spoilers.Count == 0) { return(false); } else { // Now, deal with these spoilers bool bUseKeyWords = false; if ((keywordsspoiler != null) && (keywordsspoiler.Count != 0)) { bUseKeyWords = true; } foreach (KeyValuePair <String, String> paire in spoilers) { String name = paire.Key; String url = paire.Value; bool bKeep = true; if (bUseKeyWords) { bKeep = false; // Check if one keyword is contained in the name String n = name.ToLower(); foreach (String s in keywordsspoiler) { if (n.Contains(s)) { bKeep = true; break; } } } if (bKeep) { OfflineImageWeb oiw = new OfflineImageWeb(); oiw._url = url; oiw._localfile = ocd._Code + "_P_" + Guid.NewGuid(); oiw._name = name; ocd._ImageFilesSpoilers.Add(oiw._url, oiw); } } } return(true); } catch (Exception) { return(false); } }