public void BackupTagNames(List <string> tagNames) { CleanTagLinks(); tagsDoc = InitializeDocument(); XmlElement tagsElement = tagsDoc.CreateElement(string.Empty, "tags", string.Empty); XmlElement tagElement; tagsDoc.AppendChild(tagsElement); int i = 1; string output; foreach (string tagName in tagNames) { output = $"Backing up tag ({i++})."; PrintUtils.PrintRow(output, 0, 0); tagElement = tagsDoc.CreateElement(string.Empty, "tag", string.Empty); tagElement.SetAttribute("name", ReplaceDoubleQuotes(tagName)); tagElement.SetAttribute("status", ModificationStatus.UNMODIFIED.ToString()); tagsElement.AppendChild(tagElement); } tagsDoc.Save(TagsFileName); }
public void BackupSeriesNames(List <string> seriesNames) { CleanSeries(); seriesDoc = InitializeDocument(); XmlElement allSeriesElement = seriesDoc.CreateElement(string.Empty, "all_series", string.Empty); XmlElement seriesElement; seriesDoc.AppendChild(allSeriesElement); int i = 1; string output; foreach (string seriesName in seriesNames) { output = $"Backing up series ({i++})."; PrintUtils.PrintRow(output, 0, 0); seriesElement = seriesDoc.CreateElement(string.Empty, "series", string.Empty); seriesElement.SetAttribute("name", ReplaceDoubleQuotes(seriesName)); allSeriesElement.AppendChild(seriesElement); } seriesDoc.Save(SeriesFileName); }
private void PersistColumnBatch(ICollection <string> values, string tableName, string columnName) { string replacedName; string output; MySqlCommand cmd; StringBuilder insertValues = new StringBuilder($"INSERT INTO {tableName}({columnName}) VALUES"); int i = 1; int j = 0; try { conn.Open(); MySqlTransaction transaction = conn.BeginTransaction(); foreach (string s in values) { output = $"Writing value to column {tableName}.{columnName} ({i++} / {values.Count})."; PrintUtils.PrintRow(output, 0, 0); replacedName = s.Replace("'", "''"); insertValues.Append($"('{replacedName}')"); if (values.Count == 1 || (j > 0 && (j % BatchQueryLimit == 0 || j == values.Count - 1))) { insertValues.Append(";"); cmd = new MySqlCommand(insertValues.ToString(), conn); cmd.ExecuteNonQuery(); transaction.Commit(); transaction = conn.BeginTransaction(); insertValues = new StringBuilder($"INSERT INTO {tableName}({columnName}) VALUES"); } else { insertValues.Append(","); } j++; } } catch (Exception e) { PrintUtils.Report(e); } conn.Close(); }
private void Clean(string tableName, string idColumn) { try { Console.WriteLine("Cleaning database..."); conn.Open(); int currentId = 1; int deleted = 0; string checkMin = $"SELECT MIN({idColumn}) FROM {tableName}"; MySqlCommand minCmd = new MySqlCommand(checkMin, conn); MySqlDataReader rdr = minCmd.ExecuteReader(); while (rdr.Read() && rdr[0] != DBNull.Value) { currentId = (int)rdr[0]; } rdr.Close(); string checkExists = $"SELECT {idColumn} FROM {tableName} WHERE {idColumn} = {currentId}"; string deleteBatch = $"DELETE FROM {tableName} LIMIT {BatchQueryLimit}"; string resetInc = $"ALTER TABLE {tableName} AUTO_INCREMENT = 1"; string output; MySqlCommand existsCmd = new MySqlCommand(checkExists, conn); MySqlCommand deleteCmd = new MySqlCommand(deleteBatch, conn); rdr = existsCmd.ExecuteReader(); deleteCmd.CommandTimeout = TimeOut; while (rdr.Read()) { rdr.Close(); deleteCmd.ExecuteNonQuery(); currentId += BatchQueryLimit; deleted += BatchQueryLimit; checkExists = $"SELECT {idColumn} FROM {tableName} WHERE {idColumn} = {currentId}"; existsCmd = new MySqlCommand(checkExists, conn); rdr = existsCmd.ExecuteReader(); output = $"Deleted {deleted} rows."; PrintUtils.PrintRow(output, 0, 0); } rdr.Close(); MySqlCommand resetCmd = new MySqlCommand(resetInc, conn); resetCmd.ExecuteNonQuery(); output = $"Database cleaned."; PrintUtils.PrintRow(output, 0, 0); } catch (Exception e) { PrintUtils.Report(e); } conn.Close(); }
public void PersistSeriesTags(IDictionary <string, HashSet <string> > index) { int i = 1; string output; MySqlCommand cmd; MySqlDataReader rdr; MySqlTransaction transaction = null; PersistColumnBatch(index.Keys.ToList(), "series", "name"); Dictionary <string, int> seriesDict = new Dictionary <string, int>(); Dictionary <string, int> tagDict = new Dictionary <string, int>(); string querySeries = "SELECT name, id FROM series"; string queryTags = "SELECT name, id FROM tags"; output = $"Preparing ID dictionaries."; PrintUtils.PrintRow(output, 0, 0); try { conn.Open(); cmd = new MySqlCommand(queryTags, conn); rdr = cmd.ExecuteReader(); while (rdr.Read()) { tagDict.Add((string)rdr[0], (int)rdr[1]); } rdr.Close(); cmd = new MySqlCommand(querySeries, conn); rdr = cmd.ExecuteReader(); while (rdr.Read()) { seriesDict.Add((string)rdr[0], (int)rdr[1]); } rdr.Close(); StringBuilder insertSeriesTags = new StringBuilder("INSERT INTO series_tags(tag_id, series_id) VALUES"); int j = 0; int k = 0; transaction = conn.BeginTransaction(); foreach (string seriesName in index.Keys) { int seriesId = seriesDict[seriesName]; output = $"Writing series ({i++} / {index.Keys.Count})."; PrintUtils.PrintRow(output, 0, 0); foreach (string tagName in index[seriesName]) { int tagId = tagDict[tagName]; insertSeriesTags.Append($"({tagId}, {seriesId}),"); j++; } if (j >= BatchQueryLimit || k == index.Keys.Count - 1) { insertSeriesTags.Remove(insertSeriesTags.Length - 1, 1); insertSeriesTags.Append(";"); cmd = new MySqlCommand(insertSeriesTags.ToString(), conn); cmd.CommandTimeout = TimeOut; cmd.ExecuteNonQuery(); transaction.Commit(); transaction = conn.BeginTransaction(); insertSeriesTags = new StringBuilder("INSERT INTO series_tags(tag_id, series_id) VALUES"); j = 0; } k++; } } catch (Exception e) { PrintUtils.Report(e); if (transaction != null) { transaction.Rollback(); } } conn.Close(); }
private void CountPosts(string tableName, string idColumn, string countColumn, string countQuery) { try { string output = "Computing post counts..."; int tagsOffset = 0; PrintUtils.PrintRow(output, 0, 0); conn.Open(); while (true) { string getTagIDs = $"SELECT {idColumn} FROM {tableName} LIMIT {BatchQueryLimit} OFFSET {tagsOffset}"; StringBuilder sb = new StringBuilder(); List <int> ids = new List <int>(); MySqlCommand cmd = new MySqlCommand(getTagIDs, conn); cmd.CommandTimeout = TimeOut; MySqlDataReader rdr = cmd.ExecuteReader(); while (rdr.Read()) { ids.Add((int)rdr[0]); sb.Append((int)rdr[0] + ","); } if (!rdr.HasRows) { break; } rdr.Close(); sb.Remove(sb.Length - 1, 1); string countQueryUpdated = countQuery.Replace("%", sb.ToString()); cmd = new MySqlCommand(countQueryUpdated, conn); cmd.CommandTimeout = TimeOut; string updateCount; var idCounts = new Dictionary <int, long>(); rdr = cmd.ExecuteReader(); while (rdr.Read()) { idCounts.Add((int)rdr[0], (long)rdr[1]); } rdr.Close(); foreach (int id in ids) { if (!idCounts.ContainsKey(id)) { idCounts.Add(id, 0); } } foreach (var pair in idCounts) { output = $"Updating {idColumn} {pair.Key} with {countColumn} = {pair.Value}."; PrintUtils.PrintRow(output, 0, 0); updateCount = $"UPDATE {tableName} SET {countColumn} = {pair.Value} WHERE {idColumn} = {pair.Key}"; cmd = new MySqlCommand(updateCount, conn); cmd.ExecuteNonQuery(); } tagsOffset += BatchQueryLimit; output = $"Processed {BatchQueryLimit} posts ({tagsOffset} done)."; PrintUtils.PrintRow(output, 0, 0); } } catch (Exception e) { PrintUtils.Report(e); } conn.Close(); }
public void BuildIndex() { int lastId = 0; int j = 1; string output; string tagName; XmlElement tagRoot; XmlNodeList tagNodes; Console.Clear(); Console.WriteLine("Building index..."); Stopwatch watch = new Stopwatch(); watch.Start(); // Fetch character tags do { tagRoot = client.GetPosts(urlToUse + $"tags.xml?search[category]=4&limit={TagsLimit}&page=a{lastId}&only=name,id").Result; tagNodes = tagRoot.SelectNodes("tag"); for (int i = 0; i < tagNodes.Count; i++) { tagName = tagNodes[i].SelectSingleNode("name").InnerText; if (!tagName.Contains("#")) { tagLinks.TryAdd(tagName, new List <string>()); output = $"Fetching character tags ({j++})."; PrintUtils.PrintRow(output, 0, 0); } if (i == 0) { lastId = int.Parse(tagNodes[i].SelectSingleNode("id").InnerText); } } }while (tagNodes.Count != 0); // Fetch series tags lastId = 0; j = 1; do { tagRoot = client.GetPosts(urlToUse + $"tags.xml?search[category]=3&limit={TagsLimit}&page=a{lastId}&only=name,id").Result; tagNodes = tagRoot.SelectNodes("tag"); for (int i = 0; i < tagNodes.Count; i++) { tagName = tagNodes[i].SelectSingleNode("name").InnerText; if (!tagName.Contains("#")) { seriesTags.TryAdd(tagName, new HashSet <string>()); output = $"Fetching series tags ({j++})."; PrintUtils.PrintRow(output, 0, 0); } if (i == 0) { lastId = int.Parse(tagNodes[i].SelectSingleNode("id").InnerText); } } }while (tagNodes.Count != 0); // Write empty tag and series names to backup _backup.BackupTagNames(tagLinks.Keys.ToList()); _backup.BackupSeriesNames(seriesTags.Keys.ToList()); // Fetch data from external source and write to backup FetchData(); // Persist to database Console.Clear(); Console.WriteLine("Writing to database..."); _persistence.CleanTagLinks(); _persistence.PersistTagLinks(tagLinks); _persistence.CleanSeries(); _persistence.PersistSeriesTags(seriesTags); _persistence.CountTagLinks(); _persistence.CountSeriesLinks(); SwitchDatabase(); watch.Stop(); TimeSpan timespan = TimeSpan.FromMilliseconds(watch.ElapsedMilliseconds); string timeString = string.Format("{0:D2}h:{1:D2}m:{2:D2}s:{3:D3}ms", timespan.Hours, timespan.Minutes, timespan.Seconds, timespan.Milliseconds); Resources.SystemLogger.Log($"Downloaded {tagLinks.Keys.Count} tags in {timeString} using {numThreads} thread(s)."); }
private void GetLinks(int start, int end) { int j = 1; int backoffSeconds; string tagName; string output; string path; XmlElement postRoot; XmlNodeList postNodes; XmlNode fileNode; XmlNode seriesNode; IDictionary <string, int> tagOccurrences = new Dictionary <string, int>(); List <string> linksForTag = new List <string>(); List <string> topSeries; for (int i = start; i <= end; i++) { tagName = tagLinks.Keys.ElementAt(i); while (true) { try { backoffSeconds = 10; output = $"Thread {int.Parse(Thread.CurrentThread.Name)}: processing tag '{tagName}' ({i - start + 1} / {end - start + 1}; page #{j})."; PrintUtils.PrintRow(output, 0, int.Parse(Thread.CurrentThread.Name)); path = urlToUse + $"posts.xml?tags={tagName} rating:safe&limit={PostsLimit}&page={j}&only=file_url,tag_string_copyright"; postRoot = client.GetPosts(path).Result; // Keep trying to fetch a page of posts if the first request fails. Wait for a doubling backoff-period. while (postRoot == null && backoffSeconds <= BackoffLimitSeconds) { output = $"Thread {int.Parse(Thread.CurrentThread.Name)} (Stalled; backoff: {backoffSeconds}), processing tag '{tagName}' ({i - start + 1} / {end - start + 1}; page #{j})."; PrintUtils.PrintRow(output, 0, int.Parse(Thread.CurrentThread.Name)); Thread.Sleep(backoffSeconds * 1000); postRoot = client.GetPosts(path).Result; backoffSeconds *= 2; } postNodes = postRoot.SelectNodes("post"); // If an empty page is reached, move on to the next tag. if (postNodes.Count == 0) { break; } for (int k = 0; k < postNodes.Count; k++) { fileNode = postNodes[k].SelectSingleNode("file-url"); seriesNode = postNodes[k].SelectSingleNode("tag-string-copyright"); // If there is no file url, simply skip the post if (fileNode != null) { linksForTag.Add(fileNode.InnerText); } if (seriesNode != null) { foreach (string seriesName in seriesNode.InnerText.Split(" ")) { if (!string.IsNullOrEmpty(seriesName) && seriesTags.ContainsKey(seriesName)) { if (!tagOccurrences.ContainsKey(seriesName)) { tagOccurrences.Add(seriesName, 1); } else { tagOccurrences[seriesName]++; } } } } } } catch (NullReferenceException e) // Skip the page and try fetching the next page { Resources.SystemLogger.Log($"Failed to retrieve page {j} for tag {tagName}." + Environment.NewLine + e.StackTrace); } j++; } topSeries = IndexUtils.GetTopSeries(ref tagOccurrences, SeriesLimit); // Backup each tag foreach (string series in topSeries) { _backup.BackupSingleSeriesTags(series, tagName); } _backup.BackupSingleTagLinks(tagName, linksForTag); j = 1; tagOccurrences.Clear(); linksForTag.Clear(); ClearBelow(); } }