public static void FillPropertiesFromDateTime(DateTime dateTime, SubmissionProps props) { props.PDYEAR = dateTime.Year.ToString("D4"); props.PDMON = dateTime.Month.ToString("D2"); props.PDDAY = dateTime.Day.ToString("D2"); props.PDHOUR = dateTime.Hour.ToString("D2"); props.PDMIN = dateTime.Minute.ToString("D2"); }
public async Task <ProcessingResults> ProcessSubmissionsList(List <string> subs, bool needDescription) { Console.WriteLine("Processing submissions list..."); ProcessingResults res = new ProcessingResults(); // iterate over all the submissions in list for (int i = subs.Count - 1; i >= 0; i--) { string subId = subs[i]; // don't care about empty strings if (string.IsNullOrEmpty(subId)) { continue; } Console.WriteLine("> Processing submission #" + subId); // check if in DB already try { if (SubmissionsDB.DB.Exists(uint.Parse(subId)) && GlobalSettings.Settings.downloadOnlyOnce) { Console.WriteLine("Skipped (present in DB)"); continue; } } catch { Console.WriteLine("Unexpected error (DB presence check failed)!"); continue; } string subUrl = "" + subId; // get submission page int attempts = 3; string cpage = ""; beforeawait: try { Console.WriteLine("Getting page: " + subUrl); cpage = await http.GetStringAsync(subUrl); } catch (Exception E) { Console.WriteLine("GET request error (" + subUrl + "): " + E.Message); attempts--; System.Threading.Thread.Sleep(2000); if (attempts > 0) { goto beforeawait; } else { Console.WriteLine("Giving up on #" + subId); res.failedToGetPage.Add(subId); continue; } } // process submission page string downbtnkey = "<a href=\"//"; SubmissionProps sp = new SubmissionProps(); sp.SUBMID = subId; int keypos = cpage.IndexOf(downbtnkey, StringComparison.Ordinal); if (keypos < 0) { Console.WriteLine("[Warning] got page, but it doesn't contain any download links."); res.failedToGetPage.Add(subId); continue; } cpage = cpage.Substring(keypos); cpage = cpage.Substring(cpage.IndexOf("/", StringComparison.Ordinal)); sp.URL = "https:" + cpage.Substring(0, cpage.IndexOf("\"", StringComparison.Ordinal)); // processing submission description; also extracts submission date and title { Utils.FillPropertiesFromDateTime(DateTime.Now, sp); // set Now as a fallback date sp.TITLE = "Unknown"; // fallback title // title const string key_title = @"<div class=""submission-title"">"; const string key_enddiv = "</div>"; cpage = cpage.Substring(cpage.IndexOf(key_title, StringComparison.Ordinal)); string sub_title_div = cpage.Substring(0, cpage.IndexOf(key_enddiv, cpage.IndexOf(key_enddiv, StringComparison.Ordinal) + 1, StringComparison.Ordinal) + key_enddiv.Length); var titleMatch = Regex.Match(sub_title_div, "<h2><p>(.+?)</p></h2>", RegexOptions.CultureInvariant); if (titleMatch.Success) { sp.TITLE = Utils.StripIllegalFilenameChars(titleMatch.Groups[1].Value); Console.WriteLine("Title: " + sp.TITLE); } else { Console.WriteLine("Warning :: no submission title found!"); } // replace relative date with the absolute one string sub_date_strong = ""; var dateMatch = Regex.Match(cpage, "<strong.+?title=\"(.+?)\" class=\"popup_date\">(.+?)<.+?</strong>", RegexOptions.CultureInvariant); if (dateMatch.Success) { string dateMatchVal = dateMatch.Value; string dateTimeStr = dateMatch.Groups[1].Value; // fixed format date string dateTimeStrFuzzy = dateMatch.Groups[2].Value; // depending on user settings, fuzzy and fixed times may be swapped if (dateTimeStrFuzzy.Contains(" PM") || dateTimeStrFuzzy.Contains(" AM")) { var temporary = dateTimeStr; dateTimeStr = dateTimeStrFuzzy; dateTimeStrFuzzy = temporary; } // replace relative date with a fixed format one sub_date_strong = dateMatchVal.Replace(dateTimeStrFuzzy, dateTimeStr); // parse date dateTimeStr = dateTimeStr.Replace(",", ""); { const string dateFormat = "MMM d yyyy hh:mm tt"; try { DateTime dateTime = DateTime.ParseExact(dateTimeStr, dateFormat, CultureInfo.InvariantCulture); Utils.FillPropertiesFromDateTime(dateTime, sp); } catch (Exception e) { Console.WriteLine("Warning :: cannot parse date :: " + e.Message); Console.WriteLine("Info :: date string :: " + dateTimeStr); } } } else { Console.WriteLine("Warning :: unable to extact submission date"); } // extract description const string key_desc = @"<div class=""submission-description user-submitted-links"">"; cpage = cpage.Substring(cpage.IndexOf(key_desc, StringComparison.Ordinal)); cpage = cpage.Substring(0, cpage.IndexOf(key_enddiv, cpage.IndexOf(key_enddiv, StringComparison.Ordinal) + 1, StringComparison.Ordinal) + key_enddiv.Length); cpage = cpage.Replace("href=\"/", "href=\""); cpage = cpage.Replace("src=\"//", "src=\"https://"); cpage = @"<div class=""submission-description-container link-override""> <div class=""submission-title""> <h2 class=""submission-title-header"">{{{title}}}</h2> Posted {{{date}}} </div><hr>".Replace("{{{title}}}", sp.TITLE).Replace("{{{date}}}", sub_date_strong) + cpage; } sp.ARTIST = sp.URL.Substring(sp.URL.LastIndexOf(@"/art/") + 5); sp.ARTIST = sp.ARTIST.Substring(0, sp.ARTIST.IndexOf('/')); sp.FILEFULL = sp.URL.Substring(sp.URL.LastIndexOf('/') + 1); sp.FILEFULL = Utils.StripIllegalFilenameChars(sp.FILEFULL); sp.FILEID = sp.FILEFULL.Substring(0, sp.FILEFULL.IndexOf('.')); if (sp.FILEFULL.IndexOf('_') >= 0) // valid filename (some names on FA are corrupted and contain nothing but '.' after ID) { sp.FILEPART = sp.FILEFULL.Substring(sp.FILEFULL.IndexOf('_') + 1); if (sp.FILEFULL.LastIndexOf('.') >= 0) // has extension { sp.EXT = (sp.FILEFULL + " ").Substring(sp.FILEFULL.LastIndexOf('.') + 1).TrimEnd(); if (sp.EXT.CompareTo("") == 0) { sp.EXT = @"jpg"; } } else { sp.EXT = @"jpg"; } } else { sp.FILEPART = @"unknown.jpg"; sp.EXT = @"jpg"; } // apply template(s) string fname = GlobalSettings.Settings.filenameTemplate; string dfname = GlobalSettings.Settings.descrFilenameTemplate; foreach (FieldInfo fi in sp.GetType().GetFields( BindingFlags.Instance | BindingFlags.Public).ToArray() ) { if (fi.FieldType == typeof(string)) { fname = fname.Replace("%" + fi.Name + "%", (string)fi.GetValue(sp)); dfname = dfname.Replace("%" + fi.Name + "%", (string)fi.GetValue(sp)); } } // make sure directories exist string fnamefull = Path.Combine(GlobalSettings.Settings.downloadPath, fname); string dfnamefull = Path.Combine(GlobalSettings.Settings.downloadPath, dfname); try { Directory.CreateDirectory(Path.GetDirectoryName(fnamefull)); Directory.CreateDirectory(Path.GetDirectoryName(dfnamefull)); } catch { Console.WriteLine("Failed to make sure target directories do exist."); break; } // save description if (needDescription) { try { File.WriteAllText(dfnamefull, cpage); Console.WriteLine("description saved to filename:" + dfname); } catch (Exception E) { Console.WriteLine("Error saving description:" + E.Message); } } // download file Console.WriteLine("target filename: " + fname); if (File.Exists(fnamefull)) { SubmissionsDB.DB.AddSubmission(uint.Parse(subId)); Console.WriteLine("Already exists, continuing~"); continue; } int fattempts = 3; fbeforeawait: try { Console.WriteLine("Downloading: " + sp.URL); using ( Stream contentStream = await( await http.GetAsync(sp.URL, HttpCompletionOption.ResponseHeadersRead) ).Content.ReadAsStreamAsync(), stream = new FileStream( fnamefull, FileMode.Create, FileAccess.Write, FileShare.None, 1024 * 1024 /*Mb*/, true ) ) { await ReadNetworkStream(contentStream, stream, 5000); // await contentStream.CopyToAsync(stream); // this works, but may hang forever in case of network errors SubmissionsDB.DB.AddSubmission(uint.Parse(subId)); } } catch (Exception E) { // write error message if (E is ObjectDisposedException) { Console.WriteLine("Network error (data receive timeout)"); } else { Console.WriteLine("GET request error (file " + sp.FILEID + "): " + E.Message); } // remove incomplete download if (File.Exists(fnamefull)) { File.Delete(fnamefull); } // try again or abort operation fattempts--; System.Threading.Thread.Sleep(2000); if (fattempts > 0) { goto fbeforeawait; } { Console.WriteLine("Giving up on downloading {0}", subId); res.failedToDownload.Add(subId); continue; } } Console.WriteLine("Done: #" + subId); TaskbarProgress.SetValue(currentConsoleHandle, subs.Count - i, subs.Count); res.processedPerfectly++; } // writing results try { if (res.failedToGetPage.Count > 0 || res.failedToDownload.Count > 0) { File.WriteAllLines(Path.Combine(GlobalSettings.Settings.systemPath, "get_sub_page_failed.log"), res.failedToGetPage); File.WriteAllLines(Path.Combine(GlobalSettings.Settings.systemPath, "download_failed.log"), res.failedToGetPage); } } catch (Exception E) { Console.WriteLine("Failed to save list of subs with issues: " + E.Message); } // save DB SubmissionsDB.Save(); // stop progress indicating TaskbarProgress.SetState(currentConsoleHandle, TaskbarProgress.TaskbarStates.NoProgress); // return result, actually return(res); }
public async Task <ProcessingResults> ProcessSubmissionsList(List <string> subs, bool needDescription) { Console.WriteLine("Processing submissions list..."); ProcessingResults res = new ProcessingResults(); // iterate over all the submissions in list for (int i = subs.Count - 1; i >= 0; i--) { string subId = subs[i]; // don't care about empty strings if (subId == null || subId.CompareTo("") == 0) { continue; } Console.WriteLine("> Processing submission #" + subId); // check if in DB already try { if (SubmissionsDB.DB.Exists(uint.Parse(subId)) && GlobalSettings.Settings.downloadOnlyOnce) { Console.WriteLine("Skipped (present in DB)"); continue; } } catch { Console.WriteLine("Unexpected error (DB presence check failed)!"); continue; } string subUrl = "" + subId; // get submission page int attempts = 3; string cpage = ""; beforeawait: try { Console.WriteLine("Getting page: " + subUrl); cpage = await http.GetStringAsync(subUrl); } catch (Exception E) { Console.WriteLine("GET request error (" + subUrl + "): " + E.Message); attempts--; System.Threading.Thread.Sleep(2000); if (attempts > 0) { goto beforeawait; } else { Console.WriteLine("Giving up on #" + subId); res.failedToGetPage.Add(subId); continue; } } // process submission page string downbtnkey = "<a href=\"//"; string desckey = "<div class=\"submission-description-container"; SubmissionProps sp = new SubmissionProps(); sp.SUBMID = subId; int keypos = cpage.IndexOf(downbtnkey, StringComparison.Ordinal); if (keypos < 0) { Console.WriteLine("[Warning] got page, but it doesn't contain any download links."); res.failedToGetPage.Add(subId); continue; } cpage = cpage.Substring(keypos); cpage = cpage.Substring(cpage.IndexOf("/", StringComparison.Ordinal)); sp.URL = "https:" + cpage.Substring(0, cpage.IndexOf("\"", StringComparison.Ordinal)); if (needDescription) { cpage = cpage.Substring(cpage.IndexOf(desckey, StringComparison.Ordinal)); string desckeyend = "</div>"; cpage = cpage.Substring(0, cpage.IndexOf(desckeyend, cpage.IndexOf(desckeyend) + 1) + desckeyend.Length ); cpage = cpage.Replace("href=\"/", "href=\""); cpage = cpage.Replace("src=\"//", "src=\"https://"); } sp.ARTIST = sp.URL.Substring(sp.URL.LastIndexOf(@"/art/") + 5); sp.ARTIST = sp.ARTIST.Substring(0, sp.ARTIST.IndexOf('/')); sp.FILEFULL = sp.URL.Substring(sp.URL.LastIndexOf('/') + 1); sp.FILEFULL = string.Concat(sp.FILEFULL.Split(Path.GetInvalidFileNameChars(), StringSplitOptions.RemoveEmptyEntries)); sp.FILEID = sp.FILEFULL.Substring(0, sp.FILEFULL.IndexOf('.')); if (sp.FILEFULL.IndexOf('_') >= 0) // valid filename (some names on FA are corrupted and contain nothing but '.' after ID) { sp.FILEPART = sp.FILEFULL.Substring(sp.FILEFULL.IndexOf('_') + 1); if (sp.FILEFULL.LastIndexOf('.') >= 0) // has extension { sp.EXT = (sp.FILEFULL + " ").Substring(sp.FILEFULL.LastIndexOf('.') + 1).TrimEnd(); if (sp.EXT.CompareTo("") == 0) { sp.EXT = @"jpg"; } } else { sp.EXT = @"jpg"; } } else { sp.FILEPART = @"unknown.jpg"; sp.EXT = @"jpg"; } // apply template(s) string fname = GlobalSettings.Settings.filenameTemplate; string dfname = GlobalSettings.Settings.descrFilenameTemplate; foreach (FieldInfo fi in sp.GetType().GetFields( BindingFlags.Instance | BindingFlags.Public).ToArray() ) { if (fi.FieldType == typeof(string)) { fname = fname.Replace("%" + fi.Name + "%", (string)fi.GetValue(sp)); dfname = dfname.Replace("%" + fi.Name + "%", (string)fi.GetValue(sp)); } } // make sure directories exist string fnamefull = Path.Combine(GlobalSettings.Settings.downloadPath, fname); string dfnamefull = Path.Combine(GlobalSettings.Settings.downloadPath, dfname); try { Directory.CreateDirectory(Path.GetDirectoryName(fnamefull)); Directory.CreateDirectory(Path.GetDirectoryName(dfnamefull)); } catch { Console.WriteLine("Failed to make sure target directories do exist."); break; } // save description if (needDescription) { try { File.WriteAllText(dfnamefull, cpage); Console.WriteLine("description saved to filename:" + dfname); } catch (Exception E) { Console.WriteLine("Error saving description:" + E.Message); } } // download file Console.WriteLine("target filename: " + fname); if (File.Exists(fnamefull)) { SubmissionsDB.DB.AddSubmission(uint.Parse(subId)); Console.WriteLine("Already exists, continuing~"); continue; } int fattempts = 3; fbeforeawait: try { Console.WriteLine("Downloading: " + sp.URL); using ( Stream contentStream = await( await http.GetAsync(sp.URL, HttpCompletionOption.ResponseHeadersRead) ).Content.ReadAsStreamAsync(), stream = new FileStream( fnamefull, FileMode.Create, FileAccess.Write, FileShare.None, 1024 * 1024 /*Mb*/, true ) ) { await ReadNetworkStream(contentStream, stream, 5000); // await contentStream.CopyToAsync(stream); // this works, but may hang forever in case of network errors SubmissionsDB.DB.AddSubmission(uint.Parse(subId)); } } catch (Exception E) { // write error message if (E is ObjectDisposedException) { Console.WriteLine("Network error (data receive timeout)"); } else { Console.WriteLine("GET request error (file " + sp.FILEID + "): " + E.Message); } // remove incomplete download if (File.Exists(fnamefull)) { File.Delete(fnamefull); } // try again or abort operation fattempts--; System.Threading.Thread.Sleep(2000); if (fattempts > 0) { goto fbeforeawait; } { Console.WriteLine("Giving up on downloading {0}", subId); res.failedToDownload.Add(subId); continue; } } Console.WriteLine("Done: #" + subId); TaskbarProgress.SetValue(currentConsoleHandle, subs.Count - i, subs.Count); res.processedPerfectly++; } // writing results try { if (res.failedToGetPage.Count > 0 || res.failedToDownload.Count > 0) { File.WriteAllLines(Path.Combine(GlobalSettings.Settings.systemPath, "get_sub_page_failed.log"), res.failedToGetPage); File.WriteAllLines(Path.Combine(GlobalSettings.Settings.systemPath, "download_failed.log"), res.failedToGetPage); } } catch (Exception E) { Console.WriteLine("Failed to save list of subs with issues: " + E.Message); } // save DB SubmissionsDB.Save(); // stop progress indicating TaskbarProgress.SetState(currentConsoleHandle, TaskbarProgress.TaskbarStates.NoProgress); // return result, actually return(res); }
public async Task <ProcessingResults> ProcessSubmissionsList(List <string> subs, bool needDescription, bool updateMode = false) { Console.WriteLine("Processing submissions list..."); ProcessingResults res = new ProcessingResults(); // iterate over all the submissions in list for (int i = subs.Count - 1; i >= 0; i--) { // expected format: ID#FileID@attributes // everything except for ID is optional string subStr = subs[i]; if (string.IsNullOrEmpty(subStr)) { continue; // don't care about empty strings } string subId; uint subIdInt = 0; uint subFid = 0; uint subInitFid = 0; bool aScraps = false; const string subIdRegex = @"^(?<id>[0-9]+?)(#(?<fid>[0-9]+?)){0,1}(@(?<attr>.+?)){0,1}$"; var subIdMatch = Regex.Match(subStr, subIdRegex); if (subIdMatch.Success) { subId = subIdMatch.Groups["id"].Value; uint.TryParse(subId, out subIdInt); if (subIdMatch.Groups["fid"].Success) { uint.TryParse(subIdMatch.Groups["fid"].Value, out subFid); } /// Attributes section has only been used for (terrible) scraps detection; /// a better method is now implemented, making the section useless //if (subIdMatch.Groups["attr"].Success) //{ // string attributes = subIdMatch.Groups["attr"].Value; // if (attributes.Contains("s")) aScraps = true; //} } else { Console.WriteLine("Error :: Malformed submission ID: " + subStr); continue; } uint dbSubFid = SubmissionsDB.DB.GetFileId(subIdInt); bool dbSubExists = SubmissionsDB.DB.Exists(subIdInt); Console.WriteLine(string.Format("> Processing submission {0} {1}", subId, subFid > 0 ? string.Format("(file id {0})", subFid) : "" )); // Skip submissions that can be skipped without making any network requests try { if (dbSubExists && GlobalSettings.Settings.downloadOnlyOnce) { // can skip at lowest cost if either: // * not in update mode // * file ID is known and matches the one stored in the DB if ((!updateMode) || (updateMode && dbSubFid == subFid && dbSubFid != 0)) { Console.WriteLine("Skipped (present in DB)"); continue; } else { Console.WriteLine("Submission is present in the DB, but may have been updated; re-checking~"); } } } catch { Console.WriteLine("Unexpected error (DB presence check failed)!"); continue; } // get submission page string subUrl = "" + subId; int attempts = 3; string cpage = ""; beforeawait: try { Console.WriteLine("Getting page: " + subUrl); cpage = await http.GetStringAsync(subUrl); } catch (Exception E) { Console.WriteLine("GET request error (" + subUrl + "): " + E.Message); attempts--; System.Threading.Thread.Sleep(2000); if (attempts > 0) { goto beforeawait; } else { Console.WriteLine("Giving up on #" + subId); res.failedToGetPage.Add(subId); continue; } } // process submission page var downbtnkeys = new string[] { "<a href=\"//", "<a href=\"//", "<a href=\"//" }; SubmissionProps sp = new SubmissionProps(); sp.SUBMID = subId; int keypos = -1; foreach (var downbtnkey in downbtnkeys) { keypos = cpage.IndexOf(downbtnkey, StringComparison.Ordinal); if (keypos >= 0) { break; } } if (keypos < 0) { Console.WriteLine("[Error] got page, but it doesn't contain any download links."); res.failedToGetPage.Add(subId); continue; } cpage = cpage.Substring(keypos); cpage = cpage.Substring(cpage.IndexOf("/", StringComparison.Ordinal)); sp.URL = "https:" + cpage.Substring(0, cpage.IndexOf("\"", StringComparison.Ordinal)); #region download URL parsing bool extensionInvalid = false; // future use, possibly come up with an extension that makes sense on a case by case basis const string urlComponentsRegex = @"\/art\/(?<artist>.+?)\/.*?(?<curfid>\d+?)\/(?<fid>.+?)\.(?<fname>.*)$"; var urlCompMatch = Regex.Match(sp.URL, urlComponentsRegex); if (urlCompMatch.Success) { sp.ARTIST = urlCompMatch.Groups["artist"].Value; sp.CURFILEID = urlCompMatch.Groups["curfid"].Value; uint.TryParse(sp.CURFILEID, out subFid); sp.FILEID = urlCompMatch.Groups["fid"].Value; uint.TryParse(sp.FILEID, out subInitFid); string filename = urlCompMatch.Groups["fname"].Value; /// original filename usually follows this pattern: /// $file_id.$artist_originalFileName.ext /// [^ "fname" group value ] /// however, some old (~2006) submissions use this pattern instead: /// $file_id.$artist.originalFileName.ext /// it is also quite common for the fname to be blank, i.e. /// $file_id. /// in this case we have no choice but to come up with our own name var fnameCheckMatch = Regex.Match(filename, string.Format(@"^{0}[_.](.+)", Regex.Escape(sp.ARTIST))); if (fnameCheckMatch.Success) { var filepart = fnameCheckMatch.Groups[1].Value; if (filepart.EndsWith(".") || !filepart.Contains(".")) // no extension or an empty one { extensionInvalid = true; Console.WriteLine("Info :: missing filename extension, assuming .jpg"); if (filepart.EndsWith(".")) { filepart = filepart.Substring(0, filepart.Length - 1) + ".jpg"; } else { filepart = filepart + ".jpg"; } } var filepartDotSplit = filepart.Split(new char[] { '.' }); sp.FILEPART = filepart; sp.FILEPARTNE = string.Join(".", filepartDotSplit.Take(filepartDotSplit.Length - 1)); sp.EXT = Utils.StripIllegalFilenameChars(filepartDotSplit.Last()); } else // completely broken filenames get replaced with "unknown.jpg" { Console.WriteLine("Info :: broken filename detected, replacing with \"unknown.jpg\""); sp.FILEPART = "unknown.jpg"; sp.FILEPARTNE = "unknown"; sp.EXT = "jpg"; extensionInvalid = true; } sp.FILEFULL = sp.FILEID + "." + sp.ARTIST + "_" + sp.FILEPART; sp.FILEFULL = Utils.StripIllegalFilenameChars(sp.FILEFULL); sp.FILEPART = Utils.StripIllegalFilenameChars(sp.FILEPART); } else { Console.WriteLine("Error: could not make sense of the URL for submission " + subId); res.failedToDownload.Add(subId); continue; } #endregion #region scraps detection, submission date, title and description { const string key_title = @"<div class=""submission-title"">"; const string key_enddiv = "</div>"; var submTitlePos = cpage.IndexOf(key_title, StringComparison.Ordinal); // scraps check: if there is a link to /$user/scraps before the submission title, it's in scraps var scrapsCheckMatch = Regex.Match(cpage, string.Format(@"href=""/scraps/{0}/""", Regex.Escape(sp.ARTIST))); if (scrapsCheckMatch.Success && scrapsCheckMatch.Index < submTitlePos) { Console.WriteLine("Location: scraps"); aScraps = true; } else { Console.WriteLine("Location: main gallery"); aScraps = false; } Utils.FillPropertiesFromDateTime(DateTime.Now, sp); // set Now as a fallback date sp.TITLE = "Unknown"; // fallback title // title cpage = cpage.Substring(submTitlePos); string sub_title_div = cpage.Substring(0, cpage.IndexOf(key_enddiv, cpage.IndexOf(key_enddiv, StringComparison.Ordinal) + 1, StringComparison.Ordinal) + key_enddiv.Length); var titleMatch = Regex.Match(sub_title_div, "<h2><p>(.+?)</p></h2>", RegexOptions.CultureInvariant); if (titleMatch.Success) { sp.TITLE = Utils.StripIllegalFilenameChars(System.Net.WebUtility.HtmlDecode(titleMatch.Groups[1].Value)); Console.WriteLine("Title: " + sp.TITLE); } else { Console.WriteLine("Warning :: no submission title found!"); } // replace relative date with the absolute one string sub_date_strong = ""; var dateMatch = Regex.Match(cpage, "<strong.+?title=\"(.+?)\" class=\"popup_date\">(.+?)<.+?</strong>", RegexOptions.CultureInvariant); if (dateMatch.Success) { string dateMatchVal = dateMatch.Value; string dateTimeStr = dateMatch.Groups[1].Value; // fixed format date string dateTimeStrFuzzy = dateMatch.Groups[2].Value; // depending on user settings, fuzzy and fixed times may be swapped if (dateTimeStrFuzzy.Contains(" PM") || dateTimeStrFuzzy.Contains(" AM")) { var temporary = dateTimeStr; dateTimeStr = dateTimeStrFuzzy; dateTimeStrFuzzy = temporary; } // replace relative date with a fixed format one sub_date_strong = dateMatchVal.Replace(dateTimeStrFuzzy, dateTimeStr); // parse date dateTimeStr = dateTimeStr.Replace(",", ""); { const string dateFormat = "MMM d yyyy hh:mm tt"; try { DateTime dateTime = DateTime.ParseExact(dateTimeStr, dateFormat, CultureInfo.InvariantCulture); Utils.FillPropertiesFromDateTime(dateTime, sp); } catch (Exception e) { Console.WriteLine("Warning :: cannot parse date :: " + e.Message); Console.WriteLine("Info :: date string :: " + dateTimeStr); } } } else { Console.WriteLine("Warning :: unable to extact submission date"); } // extract description const string key_desc = @"<div class=""submission-description user-submitted-links"">"; cpage = cpage.Substring(cpage.IndexOf(key_desc, StringComparison.Ordinal)); cpage = cpage.Substring(0, cpage.IndexOf(key_enddiv, cpage.IndexOf(key_enddiv, StringComparison.Ordinal) + 1, StringComparison.Ordinal) + key_enddiv.Length); cpage = cpage.Replace("href=\"/", "href=\""); cpage = cpage.Replace("src=\"//", "src=\"https://"); cpage = @"<div class=""submission-description-container link-override""> <div class=""submission-title""> <h2 class=""submission-title-header"">{{{title}}}</h2> Posted {{{date}}} </div><hr>".Replace("{{{title}}}", sp.TITLE).Replace("{{{date}}}", sub_date_strong) + cpage; } #endregion // apply template(s) string fname = GlobalSettings.Settings.filenameTemplate; string dfname = GlobalSettings.Settings.descrFilenameTemplate; var scrapsTemplate = aScraps ? GlobalSettings.Settings.scrapsTemplateActive : GlobalSettings.Settings.scrapsTemplatePassive; fname = fname.Replace("%SCRAPS%", scrapsTemplate); dfname = dfname.Replace("%SCRAPS%", scrapsTemplate); foreach (FieldInfo fi in sp.GetType().GetFields( BindingFlags.Instance | BindingFlags.Public).ToArray() ) { if (fi.FieldType == typeof(string)) { fname = fname.Replace("%" + fi.Name + "%", (string)fi.GetValue(sp)); dfname = dfname.Replace("%" + fi.Name + "%", (string)fi.GetValue(sp)); #if DEBUG_PRINT_ALL_TEMPLATE_VALS // debug only: output all template values: Console.WriteLine(string.Format("+++ {0} = {1}", "%" + fi.Name + "%", (string)fi.GetValue(sp))); #endif } } // make sure directories exist string fnamefull = Path.Combine(GlobalSettings.Settings.downloadPath, fname); string dfnamefull = Path.Combine(GlobalSettings.Settings.downloadPath, dfname); try { Directory.CreateDirectory(Path.GetDirectoryName(fnamefull)); Directory.CreateDirectory(Path.GetDirectoryName(dfnamefull)); } catch { Console.WriteLine("Failed to make sure target directories do exist."); break; } // save description if (needDescription) { try { File.WriteAllText(dfnamefull, cpage); Console.WriteLine("description saved to filename:" + dfname); } catch (Exception E) { Console.WriteLine("Error saving description:" + E.Message); } } var fileExists = File.Exists(fnamefull); Console.WriteLine("target filename: " + fname + (fileExists ? " (exists)" : "")); // at this point we have the actual file ID, and can skip downloading based on that if (GlobalSettings.Settings.downloadOnlyOnce) { if ((!updateMode) && fileExists) // checked earlier: && !dbSubExists { if (subInitFid != subFid) { Console.WriteLine(string.Format( "Note :: submission {0} exists locally, but could've been updated\n" + "consider running this task in update mode", subId )); SubmissionsDB.DB.AddSubmission(subIdInt); } else { Console.WriteLine("Already exists, continuing~"); SubmissionsDB.DB.AddSubmissionWithFileId(subIdInt, subFid); } continue; } // this exact check can also be found before, it is repeated here for cases // when subFid was not known before the submission page request if (updateMode && dbSubFid == subFid && dbSubFid != 0) { Console.WriteLine("Already downloaded, continuing~"); continue; } } else // not `download only once` { if ((!updateMode) && fileExists) { if (subInitFid != subFid) { SubmissionsDB.DB.AddSubmission(subIdInt); } else { SubmissionsDB.DB.AddSubmissionWithFileId(subIdInt, subFid); } Console.WriteLine("Already exists, continuing~"); continue; } if (updateMode && fileExists && dbSubFid == subFid && dbSubFid != 0) { Console.WriteLine("Already exists, continuing~"); continue; } } // if we got here, there was no reason to skip the download bool mayBeUselessDownload = false; string oldFileHash = ""; if (fileExists) { Console.WriteLine(string.Format("subfid {0} dbsf {1}", subFid, dbSubFid)); oldFileHash = Utils.FileHash(fnamefull); fnamefull = Path.Combine(GlobalSettings.Settings.downloadPath, string.Format("{1} [v.{0}].{2}", subFid, fname, sp.EXT)); if (!(subFid != dbSubFid && dbSubFid != 0)) { Console.WriteLine("Info :: stored metadata is insufficient; downloading a remote file to compare aganst local"); mayBeUselessDownload = true; } } // download file int fattempts = 3; fbeforeawait: try { Console.WriteLine("Downloading: " + sp.URL); // "?" can only be in the URL if the user named their submission this way // it WILL be mistreated as an URL parameter, but this dirty hack with explicit replacement fixes it using (var response = await http.GetAsync(sp.URL.Replace("?", "%3F"), HttpCompletionOption.ResponseHeadersRead)) { if (!response.IsSuccessStatusCode) { throw new Exception(string.Format("HTTP error: {0}", response.StatusCode)); } using ( Stream contentStream = await response.Content.ReadAsStreamAsync(), stream = new FileStream( fnamefull, FileMode.Create, FileAccess.Write, FileShare.None, 1024 * 1024 /*Mb*/, true ) ) { await ReadNetworkStream(contentStream, stream, 5000); // await contentStream.CopyToAsync(stream); // this works, but may hang forever in case of network errors } } } catch (Exception E) { // write error message if (E is ObjectDisposedException) { Console.WriteLine("Network error (data receive timeout)"); } else { Console.WriteLine("GET request error (file " + sp.FILEID + "): " + E.Message); } // remove incomplete download if (File.Exists(fnamefull)) { File.Delete(fnamefull); } // try again or abort operation fattempts--; System.Threading.Thread.Sleep(2000); if (fattempts > 0) { goto fbeforeawait; } { Console.WriteLine("Giving up on downloading {0}", subId); res.failedToDownload.Add(subId); continue; } } SubmissionsDB.DB.AddSubmissionWithFileId(subIdInt, subFid); if (mayBeUselessDownload) { var newFileHash = Utils.FileHash(fnamefull); if (newFileHash == oldFileHash) { Console.WriteLine("Note :: existing version matches the one on the server, removing a duplicate"); File.Delete(fnamefull); } } Console.WriteLine("Done: #" + subId); TaskbarProgress.SetValue(currentConsoleHandle, subs.Count - i, subs.Count); res.processedPerfectly++; } // writing results try { if (res.failedToGetPage.Count > 0 || res.failedToDownload.Count > 0) { File.WriteAllLines(Path.Combine(GlobalSettings.Settings.systemPath, "get_sub_page_failed.log"), res.failedToGetPage); File.WriteAllLines(Path.Combine(GlobalSettings.Settings.systemPath, "download_failed.log"), res.failedToDownload); } } catch (Exception E) { Console.WriteLine("Failed to save list of subs with issues: " + E.Message); } // save DB SubmissionsDB.Save(); // stop progress indicating TaskbarProgress.SetState(currentConsoleHandle, TaskbarProgress.TaskbarStates.NoProgress); // return result, actually return(res); }