コード例 #1
0
        public void BackupTagNames(List <string> tagNames)
        {
            CleanTagLinks();

            tagsDoc = InitializeDocument();

            XmlElement tagsElement = tagsDoc.CreateElement(string.Empty, "tags", string.Empty);
            XmlElement tagElement;

            tagsDoc.AppendChild(tagsElement);

            int    i = 1;
            string output;

            foreach (string tagName in tagNames)
            {
                output = $"Backing up tag ({i++}).";
                PrintUtils.PrintRow(output, 0, 0);

                tagElement = tagsDoc.CreateElement(string.Empty, "tag", string.Empty);

                tagElement.SetAttribute("name", ReplaceDoubleQuotes(tagName));
                tagElement.SetAttribute("status", ModificationStatus.UNMODIFIED.ToString());
                tagsElement.AppendChild(tagElement);
            }

            tagsDoc.Save(TagsFileName);
        }
コード例 #2
0
        public void BackupSeriesNames(List <string> seriesNames)
        {
            CleanSeries();

            seriesDoc = InitializeDocument();

            XmlElement allSeriesElement = seriesDoc.CreateElement(string.Empty, "all_series", string.Empty);
            XmlElement seriesElement;

            seriesDoc.AppendChild(allSeriesElement);

            int    i = 1;
            string output;

            foreach (string seriesName in seriesNames)
            {
                output = $"Backing up series ({i++}).";
                PrintUtils.PrintRow(output, 0, 0);

                seriesElement = seriesDoc.CreateElement(string.Empty, "series", string.Empty);

                seriesElement.SetAttribute("name", ReplaceDoubleQuotes(seriesName));
                allSeriesElement.AppendChild(seriesElement);
            }

            seriesDoc.Save(SeriesFileName);
        }
コード例 #3
0
        private void PersistColumnBatch(ICollection <string> values, string tableName, string columnName)
        {
            string        replacedName;
            string        output;
            MySqlCommand  cmd;
            StringBuilder insertValues = new StringBuilder($"INSERT INTO {tableName}({columnName}) VALUES");
            int           i            = 1;
            int           j            = 0;

            try
            {
                conn.Open();
                MySqlTransaction transaction = conn.BeginTransaction();

                foreach (string s in values)
                {
                    output = $"Writing value to column {tableName}.{columnName} ({i++} / {values.Count}).";

                    PrintUtils.PrintRow(output, 0, 0);

                    replacedName = s.Replace("'", "''");
                    insertValues.Append($"('{replacedName}')");

                    if (values.Count == 1 || (j > 0 && (j % BatchQueryLimit == 0 || j == values.Count - 1)))
                    {
                        insertValues.Append(";");

                        cmd = new MySqlCommand(insertValues.ToString(), conn);

                        cmd.ExecuteNonQuery();
                        transaction.Commit();

                        transaction  = conn.BeginTransaction();
                        insertValues = new StringBuilder($"INSERT INTO {tableName}({columnName}) VALUES");
                    }
                    else
                    {
                        insertValues.Append(",");
                    }

                    j++;
                }
            }
            catch (Exception e)
            {
                PrintUtils.Report(e);
            }

            conn.Close();
        }
コード例 #4
0
        private void Clean(string tableName, string idColumn)
        {
            try
            {
                Console.WriteLine("Cleaning database...");

                conn.Open();

                int    currentId = 1;
                int    deleted   = 0;
                string checkMin  = $"SELECT MIN({idColumn}) FROM {tableName}";

                MySqlCommand    minCmd = new MySqlCommand(checkMin, conn);
                MySqlDataReader rdr    = minCmd.ExecuteReader();

                while (rdr.Read() && rdr[0] != DBNull.Value)
                {
                    currentId = (int)rdr[0];
                }

                rdr.Close();

                string checkExists = $"SELECT {idColumn} FROM {tableName} WHERE {idColumn} = {currentId}";
                string deleteBatch = $"DELETE FROM {tableName} LIMIT {BatchQueryLimit}";
                string resetInc    = $"ALTER TABLE {tableName} AUTO_INCREMENT = 1";
                string output;

                MySqlCommand existsCmd = new MySqlCommand(checkExists, conn);
                MySqlCommand deleteCmd = new MySqlCommand(deleteBatch, conn);
                rdr = existsCmd.ExecuteReader();

                deleteCmd.CommandTimeout = TimeOut;

                while (rdr.Read())
                {
                    rdr.Close();
                    deleteCmd.ExecuteNonQuery();

                    currentId  += BatchQueryLimit;
                    deleted    += BatchQueryLimit;
                    checkExists = $"SELECT {idColumn} FROM {tableName} WHERE {idColumn} = {currentId}";
                    existsCmd   = new MySqlCommand(checkExists, conn);

                    rdr    = existsCmd.ExecuteReader();
                    output = $"Deleted {deleted} rows.";

                    PrintUtils.PrintRow(output, 0, 0);
                }

                rdr.Close();

                MySqlCommand resetCmd = new MySqlCommand(resetInc, conn);
                resetCmd.ExecuteNonQuery();

                output = $"Database cleaned.";

                PrintUtils.PrintRow(output, 0, 0);
            }
            catch (Exception e)
            {
                PrintUtils.Report(e);
            }

            conn.Close();
        }
コード例 #5
0
        public void PersistSeriesTags(IDictionary <string, HashSet <string> > index)
        {
            int              i = 1;
            string           output;
            MySqlCommand     cmd;
            MySqlDataReader  rdr;
            MySqlTransaction transaction = null;

            PersistColumnBatch(index.Keys.ToList(), "series", "name");

            Dictionary <string, int> seriesDict = new Dictionary <string, int>();
            Dictionary <string, int> tagDict    = new Dictionary <string, int>();
            string querySeries = "SELECT name, id FROM series";
            string queryTags   = "SELECT name, id FROM tags";

            output = $"Preparing ID dictionaries.";

            PrintUtils.PrintRow(output, 0, 0);

            try
            {
                conn.Open();

                cmd = new MySqlCommand(queryTags, conn);
                rdr = cmd.ExecuteReader();

                while (rdr.Read())
                {
                    tagDict.Add((string)rdr[0], (int)rdr[1]);
                }

                rdr.Close();

                cmd = new MySqlCommand(querySeries, conn);
                rdr = cmd.ExecuteReader();

                while (rdr.Read())
                {
                    seriesDict.Add((string)rdr[0], (int)rdr[1]);
                }

                rdr.Close();

                StringBuilder insertSeriesTags = new StringBuilder("INSERT INTO series_tags(tag_id, series_id) VALUES");
                int           j = 0;
                int           k = 0;

                transaction = conn.BeginTransaction();

                foreach (string seriesName in index.Keys)
                {
                    int seriesId = seriesDict[seriesName];
                    output = $"Writing series ({i++} / {index.Keys.Count}).";

                    PrintUtils.PrintRow(output, 0, 0);

                    foreach (string tagName in index[seriesName])
                    {
                        int tagId = tagDict[tagName];

                        insertSeriesTags.Append($"({tagId}, {seriesId}),");

                        j++;
                    }

                    if (j >= BatchQueryLimit || k == index.Keys.Count - 1)
                    {
                        insertSeriesTags.Remove(insertSeriesTags.Length - 1, 1);
                        insertSeriesTags.Append(";");

                        cmd = new MySqlCommand(insertSeriesTags.ToString(), conn);

                        cmd.CommandTimeout = TimeOut;
                        cmd.ExecuteNonQuery();
                        transaction.Commit();

                        transaction      = conn.BeginTransaction();
                        insertSeriesTags = new StringBuilder("INSERT INTO series_tags(tag_id, series_id) VALUES");

                        j = 0;
                    }

                    k++;
                }
            }
            catch (Exception e)
            {
                PrintUtils.Report(e);

                if (transaction != null)
                {
                    transaction.Rollback();
                }
            }

            conn.Close();
        }
コード例 #6
0
        private void CountPosts(string tableName, string idColumn, string countColumn, string countQuery)
        {
            try
            {
                string output     = "Computing post counts...";
                int    tagsOffset = 0;

                PrintUtils.PrintRow(output, 0, 0);
                conn.Open();

                while (true)
                {
                    string        getTagIDs = $"SELECT {idColumn} FROM {tableName} LIMIT {BatchQueryLimit} OFFSET {tagsOffset}";
                    StringBuilder sb        = new StringBuilder();
                    List <int>    ids       = new List <int>();
                    MySqlCommand  cmd       = new MySqlCommand(getTagIDs, conn);

                    cmd.CommandTimeout = TimeOut;

                    MySqlDataReader rdr = cmd.ExecuteReader();

                    while (rdr.Read())
                    {
                        ids.Add((int)rdr[0]);
                        sb.Append((int)rdr[0] + ",");
                    }

                    if (!rdr.HasRows)
                    {
                        break;
                    }

                    rdr.Close();

                    sb.Remove(sb.Length - 1, 1);
                    string countQueryUpdated = countQuery.Replace("%", sb.ToString());

                    cmd = new MySqlCommand(countQueryUpdated, conn);

                    cmd.CommandTimeout = TimeOut;

                    string updateCount;
                    var    idCounts = new Dictionary <int, long>();
                    rdr = cmd.ExecuteReader();

                    while (rdr.Read())
                    {
                        idCounts.Add((int)rdr[0], (long)rdr[1]);
                    }

                    rdr.Close();

                    foreach (int id in ids)
                    {
                        if (!idCounts.ContainsKey(id))
                        {
                            idCounts.Add(id, 0);
                        }
                    }

                    foreach (var pair in idCounts)
                    {
                        output = $"Updating {idColumn} {pair.Key} with {countColumn} = {pair.Value}.";

                        PrintUtils.PrintRow(output, 0, 0);

                        updateCount = $"UPDATE {tableName} SET {countColumn} = {pair.Value} WHERE {idColumn} = {pair.Key}";
                        cmd         = new MySqlCommand(updateCount, conn);

                        cmd.ExecuteNonQuery();
                    }

                    tagsOffset += BatchQueryLimit;

                    output = $"Processed {BatchQueryLimit} posts ({tagsOffset} done).";

                    PrintUtils.PrintRow(output, 0, 0);
                }
            }
            catch (Exception e)
            {
                PrintUtils.Report(e);
            }

            conn.Close();
        }
コード例 #7
0
        public void BuildIndex()
        {
            int         lastId = 0;
            int         j      = 1;
            string      output;
            string      tagName;
            XmlElement  tagRoot;
            XmlNodeList tagNodes;

            Console.Clear();
            Console.WriteLine("Building index...");

            Stopwatch watch = new Stopwatch();

            watch.Start();

            // Fetch character tags
            do
            {
                tagRoot  = client.GetPosts(urlToUse + $"tags.xml?search[category]=4&limit={TagsLimit}&page=a{lastId}&only=name,id").Result;
                tagNodes = tagRoot.SelectNodes("tag");

                for (int i = 0; i < tagNodes.Count; i++)
                {
                    tagName = tagNodes[i].SelectSingleNode("name").InnerText;

                    if (!tagName.Contains("#"))
                    {
                        tagLinks.TryAdd(tagName, new List <string>());

                        output = $"Fetching character tags ({j++}).";
                        PrintUtils.PrintRow(output, 0, 0);
                    }

                    if (i == 0)
                    {
                        lastId = int.Parse(tagNodes[i].SelectSingleNode("id").InnerText);
                    }
                }
            }while (tagNodes.Count != 0);

            // Fetch series tags
            lastId = 0;
            j      = 1;

            do
            {
                tagRoot  = client.GetPosts(urlToUse + $"tags.xml?search[category]=3&limit={TagsLimit}&page=a{lastId}&only=name,id").Result;
                tagNodes = tagRoot.SelectNodes("tag");

                for (int i = 0; i < tagNodes.Count; i++)
                {
                    tagName = tagNodes[i].SelectSingleNode("name").InnerText;

                    if (!tagName.Contains("#"))
                    {
                        seriesTags.TryAdd(tagName, new HashSet <string>());

                        output = $"Fetching series tags ({j++}).";
                        PrintUtils.PrintRow(output, 0, 0);
                    }

                    if (i == 0)
                    {
                        lastId = int.Parse(tagNodes[i].SelectSingleNode("id").InnerText);
                    }
                }
            }while (tagNodes.Count != 0);

            // Write empty tag and series names to backup
            _backup.BackupTagNames(tagLinks.Keys.ToList());
            _backup.BackupSeriesNames(seriesTags.Keys.ToList());

            // Fetch data from external source and write to backup
            FetchData();

            // Persist to database
            Console.Clear();
            Console.WriteLine("Writing to database...");

            _persistence.CleanTagLinks();
            _persistence.PersistTagLinks(tagLinks);

            _persistence.CleanSeries();
            _persistence.PersistSeriesTags(seriesTags);

            _persistence.CountTagLinks();
            _persistence.CountSeriesLinks();

            SwitchDatabase();

            watch.Stop();

            TimeSpan timespan   = TimeSpan.FromMilliseconds(watch.ElapsedMilliseconds);
            string   timeString = string.Format("{0:D2}h:{1:D2}m:{2:D2}s:{3:D3}ms", timespan.Hours, timespan.Minutes, timespan.Seconds, timespan.Milliseconds);

            Resources.SystemLogger.Log($"Downloaded {tagLinks.Keys.Count} tags in {timeString} using {numThreads} thread(s).");
        }
コード例 #8
0
        private void GetLinks(int start, int end)
        {
            int         j = 1;
            int         backoffSeconds;
            string      tagName;
            string      output;
            string      path;
            XmlElement  postRoot;
            XmlNodeList postNodes;
            XmlNode     fileNode;
            XmlNode     seriesNode;
            IDictionary <string, int> tagOccurrences = new Dictionary <string, int>();
            List <string>             linksForTag    = new List <string>();
            List <string>             topSeries;

            for (int i = start; i <= end; i++)
            {
                tagName = tagLinks.Keys.ElementAt(i);

                while (true)
                {
                    try
                    {
                        backoffSeconds = 10;
                        output         = $"Thread {int.Parse(Thread.CurrentThread.Name)}: processing tag '{tagName}' ({i - start + 1} / {end - start + 1}; page #{j}).";

                        PrintUtils.PrintRow(output, 0, int.Parse(Thread.CurrentThread.Name));

                        path     = urlToUse + $"posts.xml?tags={tagName} rating:safe&limit={PostsLimit}&page={j}&only=file_url,tag_string_copyright";
                        postRoot = client.GetPosts(path).Result;

                        // Keep trying to fetch a page of posts if the first request fails. Wait for a doubling backoff-period.
                        while (postRoot == null && backoffSeconds <= BackoffLimitSeconds)
                        {
                            output = $"Thread {int.Parse(Thread.CurrentThread.Name)} (Stalled; backoff: {backoffSeconds}), processing tag '{tagName}' ({i - start + 1} / {end - start + 1}; page #{j}).";

                            PrintUtils.PrintRow(output, 0, int.Parse(Thread.CurrentThread.Name));
                            Thread.Sleep(backoffSeconds * 1000);

                            postRoot        = client.GetPosts(path).Result;
                            backoffSeconds *= 2;
                        }

                        postNodes = postRoot.SelectNodes("post");

                        // If an empty page is reached, move on to the next tag.
                        if (postNodes.Count == 0)
                        {
                            break;
                        }

                        for (int k = 0; k < postNodes.Count; k++)
                        {
                            fileNode   = postNodes[k].SelectSingleNode("file-url");
                            seriesNode = postNodes[k].SelectSingleNode("tag-string-copyright");

                            // If there is no file url, simply skip the post
                            if (fileNode != null)
                            {
                                linksForTag.Add(fileNode.InnerText);
                            }

                            if (seriesNode != null)
                            {
                                foreach (string seriesName in seriesNode.InnerText.Split(" "))
                                {
                                    if (!string.IsNullOrEmpty(seriesName) && seriesTags.ContainsKey(seriesName))
                                    {
                                        if (!tagOccurrences.ContainsKey(seriesName))
                                        {
                                            tagOccurrences.Add(seriesName, 1);
                                        }
                                        else
                                        {
                                            tagOccurrences[seriesName]++;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    catch (NullReferenceException e) // Skip the page and try fetching the next page
                    {
                        Resources.SystemLogger.Log($"Failed to retrieve page {j} for tag {tagName}." + Environment.NewLine + e.StackTrace);
                    }

                    j++;
                }

                topSeries = IndexUtils.GetTopSeries(ref tagOccurrences, SeriesLimit);

                // Backup each tag
                foreach (string series in topSeries)
                {
                    _backup.BackupSingleSeriesTags(series, tagName);
                }

                _backup.BackupSingleTagLinks(tagName, linksForTag);

                j = 1;

                tagOccurrences.Clear();
                linksForTag.Clear();

                ClearBelow();
            }
        }