Esempio n. 1
0
        //画像が削除されて意味がなくなったツイートを消す
        //URL転載したやつの転載元ツイートが消された場合
        public int RemoveOrphanTweet()
        {
            const int    BulkUnit      = 100;
            const int    RangeSeconds  = 300;
            const string head          = @"DELETE FROM tweet WHERE tweet_id IN";
            string       BulkDeleteCmd = BulkCmdStrIn(BulkUnit, head);

            TransformBlock <long, DataTable> GetTweetBlock = new TransformBlock <long, DataTable>(async(long id) =>
            {
                using (MySqlCommand Cmd = new MySqlCommand(@"SELECT tweet_id
FROM tweet
WHERE retweet_id IS NULL
AND NOT EXISTS (SELECT * FROM tweet_media WHERE tweet_media.tweet_id = tweet.tweet_id)
AND tweet_id BETWEEN @begin AND @end
ORDER BY tweet_id DESC;"))
                {
                    Cmd.Parameters.AddWithValue("@begin", id);
                    Cmd.Parameters.AddWithValue("@end", id + SnowFlake.msinSnowFlake * RangeSeconds * 1000 - 1);
                    return(await SelectTable(Cmd, IsolationLevel.RepeatableRead));
                }
            }, new ExecutionDataflowBlockOptions {
                MaxDegreeOfParallelism = Environment.ProcessorCount
            });


            DateTimeOffset date = DateTimeOffset.UtcNow.AddHours(-1);

            for (int i = 0; i < 20; i++)
            {
                GetTweetBlock.Post(SnowFlake.SecondinSnowFlake(date, false));
                date = date.AddHours(-1);
            }
            while (true)
            {
                DataTable Table = GetTweetBlock.Receive();
                if (Table.Rows.Count > 0)
                {
                    using (MySqlCommand delcmd = new MySqlCommand(BulkCmdStrIn(Table.Rows.Count, head)))
                    {
                        for (int n = 0; n < Table.Rows.Count; n++)
                        {
                            delcmd.Parameters.AddWithValue("@" + n.ToString(), Table.Rows[n].Field <long>(0));
                        }
                        Console.WriteLine("{0} {1} Tweets removed", date, ExecuteNonQuery(delcmd));
                    }
                }
                GetTweetBlock.Post(SnowFlake.SecondinSnowFlake(date, false));
                date = date.AddSeconds(-RangeSeconds);
            }
        }
Esempio n. 2
0
        public async Task OnGetAsync()
        {
            var sw = Stopwatch.StartNew();

            //一瞬でも速くしたいので先にTaskを作って必要なところでawaitする
            Params = new FeaturedParameters();
            var ParamsTask = Params.InitValidate(HttpContext);

            var ThisDate = Date.HasValue ? DateTimeOffset.FromUnixTimeSeconds(Date.Value) : DateTimeOffset.UtcNow;

            await ParamsTask.ConfigureAwait(false);

            Tweets = await DB.SimilarMediaFeatured(3, SnowFlake.SecondinSnowFlake(ThisDate - TimeSpan.FromHours(1), false), SnowFlake.SecondinSnowFlake(ThisDate, true), Params.Featured_Order.Value).ConfigureAwait(false);

            if (Tweets.Length == 0)
            {
                HttpContext.Response.StatusCode = StatusCodes.Status404NotFound;
            }
            QueryElapsedMilliseconds = sw.ElapsedMilliseconds;
        }
Esempio n. 3
0
        public async Task OnGetAsync()
        {
            var sw = Stopwatch.StartNew();

            //一瞬でも速くしたいので先にTaskを作って必要なところでawaitする
            var TargetUserTask = DB.SelectUser(UserId);

            Params = new TLUserParameters();
            var ParamsTask = Params.InitValidate(HttpContext);

            if (Date.HasValue)
            {
                Before = SnowFlake.SecondinSnowFlake(DateTimeOffset.FromUnixTimeSeconds(Date.Value), true);
            }
            long LastTweet = Before ?? After ?? SnowFlake.Now(true);
            bool IsBefore  = Before.HasValue || !After.HasValue;

            await ParamsTask.ConfigureAwait(false);

            //crawlinfoは「自分のツイート」のときだけ取得する
            var CrawlInfoTask = Params.ID == UserId?DB.SelectCrawlInfo(UserId) : null;

            var TweetsTask = DB.SimilarMediaUser(UserId, Params.ID, LastTweet, Params.TLUser_Count, 3, Params.TLUser_RT, Params.TLUser_Show0, IsBefore);

            await Task.WhenAll(TargetUserTask, TweetsTask).ConfigureAwait(false);

            if (CrawlInfoTask != null)
            {
                Crawlinfo = await CrawlInfoTask.ConfigureAwait(false);
            }
            TargetUser = TargetUserTask.Result;
            Tweets     = TweetsTask.Result;
            if (Tweets.Length == 0)
            {
                HttpContext.Response.StatusCode = StatusCodes.Status404NotFound;
            }
            QueryElapsedMilliseconds = sw.ElapsedMilliseconds;
        }
Esempio n. 4
0
        public async Task <ActionResult> OnGetAsync()
        {
            var sw = Stopwatch.StartNew();

            //一瞬でも速くしたいので先にTaskを作って必要なところでawaitする
            Params = new TLUserParameters();
            var ParamsTask = Params.InitValidate(HttpContext);

            if (Date.HasValue)
            {
                Before = SnowFlake.SecondinSnowFlake(DateTimeOffset.FromUnixTimeSeconds(Date.Value), true);
            }
            long LastTweet = Before ?? After ?? SnowFlake.Now(true);
            bool IsBefore  = Before.HasValue || !After.HasValue;

            await ParamsTask.ConfigureAwait(false);

            if (!Params.ID.HasValue)
            {
                return(LocalRedirect("/"));
            }
            var CrawlInfoTask  = DB.SelectCrawlInfo(Params.ID.Value);
            var TargetUserTask = DB.SelectUser(Params.ID.Value);
            var TweetsTask     = DB.SimilarMediaTimeline(Params.ID.Value, Params.ID, LastTweet, Params.TLUser_Count, 3, Params.TLUser_RT, Params.TLUser_Show0, IsBefore);

            await Task.WhenAll(TargetUserTask, TweetsTask, CrawlInfoTask).ConfigureAwait(false);

            TargetUser = TargetUserTask.Result;
            Tweets     = TweetsTask.Result;
            Crawlinfo  = CrawlInfoTask.Result;
            if (Tweets.Length == 0)
            {
                HttpContext.Response.StatusCode = StatusCodes.Status404NotFound;
            }
            QueryElapsedMilliseconds = sw.ElapsedMilliseconds;
            return(Page());
        }
Esempio n. 5
0
        /// <summary>
        /// BlurHashが壊れてたから全消しする
        /// </summary>
        /// <returns></returns>
        public async Task DeleteAllBlurHash()
        {
            const string updateCmdStr = @"UPDATE media_text SET blurhash = '' WHERE media_id = @a";

            long MediaCount  = 0;
            long InsertCount = 0;

            var brokenHashRegex = new Regex(@"(..)\1{4,}", RegexOptions.Compiled);

            var doblock = new ActionBlock <long>(async(snowflake) =>
            {
                var medialist = new List <(long media_id, string blurhash)>();
                while (true)
                {
                    using (var getcmd = new MySqlCommand(@"SELECT media_id, blurhash FROM media_text USE INDEX(PRIMARY) 
WHERE media_id BETWEEN @begin AND @end
AND blurhash != '';"))
                    {
                        getcmd.Parameters.Add("@begin", MySqlDbType.Int64).Value = snowflake;
                        getcmd.Parameters.Add("@end", MySqlDbType.Int64).Value   = snowflake + SnowFlake.msinSnowFlake * 1000 * 60 - 1;
                        if (await ExecuteReader(getcmd, (r) => medialist.Add((r.GetInt64(0), r.GetString(1)))).ConfigureAwait(false))
                        {
                            break;
                        }
                    }
                }
                Interlocked.Add(ref MediaCount, medialist.Count);
                int localUpdateCount = 0;
                using (var updateCmd = new MySqlCommand(updateCmdStr))
                {
                    var p = updateCmd.Parameters.Add("@a", MySqlDbType.Int64);
                    foreach (var m in medialist)
                    {
                        if (!brokenHashRegex.IsMatch(m.blurhash))
                        {
                            continue;
                        }
                        p.Value           = m.media_id;
                        localUpdateCount += await ExecuteNonQuery(updateCmd).ConfigureAwait(false);
                    }
                }
                Interlocked.Add(ref InsertCount, localUpdateCount);
            }, new ExecutionDataflowBlockOptions()
            {
                MaxDegreeOfParallelism = Environment.ProcessorCount, BoundedCapacity = Environment.ProcessorCount + 1
            });

            var sw = Stopwatch.StartNew();

            long endsnowflake = SnowFlake.SecondinSnowFlake(DateTimeOffset.UtcNow, false);
            long snowflakecount;

            for (snowflakecount = 1125015780388372481; snowflakecount < endsnowflake; snowflakecount += SnowFlake.msinSnowFlake * 1000 * 60)
            {
                await doblock.SendAsync(snowflakecount).ConfigureAwait(false);

                if (sw.ElapsedMilliseconds >= 60000)
                {
                    Console.WriteLine("{0}\t{1} / {2}\t{3}", DateTime.Now, InsertCount, MediaCount, snowflakecount);
                    sw.Restart();
                }
            }
            doblock.Complete();
            await doblock.Completion.ConfigureAwait(false);

            Console.WriteLine("{0}\t{1} / {2}\t{3}", DateTime.Now, InsertCount, MediaCount, snowflakecount);
        }
Esempio n. 6
0
        //twimgで画像が消えてたら条件付きでツイを消したい
        public async Task DeleteRemovedTweet(DateTimeOffset Begin, DateTimeOffset Exclude)
        {
            //本当にツイートを消すやつ
            var RemoveTweetBlock = new ActionBlock <long>(async(tweet_id) =>
            {
                //もっと古い公開ツイートがあるのは先に確認したぞ
                Counter.TweetToDelete.Increment();
                using (var cmd = new MySqlCommand(@"DELETE FROM tweet WHERE tweet_id = @tweet_id;"))
                    using (var cmd2 = new MySqlCommand(@"DELETE FROM tweet_text WHERE tweet_id = @tweet_id;"))
                    {
                        cmd.Parameters.Add("@tweet_id", MySqlDbType.Int64).Value  = tweet_id;
                        cmd2.Parameters.Add("@tweet_id", MySqlDbType.Int64).Value = tweet_id;
                        if (await ExecuteNonQuery(new[] { cmd, cmd2 }).ConfigureAwait(false) > 0)
                        {
                            Counter.TweetDeleted.Increment();
                        }
                    }
            }, new ExecutionDataflowBlockOptions()
            {
                MaxDegreeOfParallelism = Environment.ProcessorCount
            });

            //画像の存在確認をするやつ
            var TryDownloadBlock = new ActionBlock <IEnumerable <MediaInfo> >(async(media) =>
            {
                if (media.Count() == 0)
                {
                    return;
                }
                foreach (var m in media)
                {
                    Counter.MediaTotal.Increment();
                    using (var req = new HttpRequestMessage(HttpMethod.Head, m.media_url))
                    {
                        req.Headers.Referrer = new Uri(m.tweet_url);
                        using (var res = await Http.SendAsync(req).ConfigureAwait(false))
                        {
                            if (res.StatusCode == HttpStatusCode.NotFound ||
                                res.StatusCode == HttpStatusCode.Forbidden ||
                                res.StatusCode == HttpStatusCode.Gone)
                            {
                                Counter.MediaGone.Increment();
                            }
                            else
                            {
                                return;
                            }
                        }
                    }
                }
                //ここまでたどり着いたら画像が全滅ってこと
                RemoveTweetBlock.Post(media.First().source_tweet_id);
            }, new ExecutionDataflowBlockOptions()
            {
                BoundedCapacity            = DownloadConcurrency << 2,
                    MaxDegreeOfParallelism = DownloadConcurrency
            });

            //画像転載の疑いがあるツイだけ選ぶやつ
            var CheckTweetBlock = new ActionBlock <(long tweet_id, long[] media_id)>(async(t) =>
            {
                //Block自体の詰まりを検出するためこっちでやる
                Counter.TweetToCheck.Increment();

                //ハッシュ値が同じで古い奴
                using (var mediacmd = new MySqlCommand(@"SELECT EXISTS(
SELECT * FROM media m
JOIN tweet_media USING (media_id)
JOIN tweet t USING (tweet_id)
JOIN user u USING (user_id)
WHERE dcthash = (SELECT dcthash FROM media WHERE media_id = @media_id)
AND t.tweet_id < @tweet_id
AND u.isprotected IS FALSE);"))

/* OR EXISTS(
 * SELECT * FROM media m
 * JOIN tweet_media USING (media_id)
 * JOIN tweet t USING (tweet_id)
 * JOIN user u USING (user_id)
 * JOIN dcthashpairslim h ON h.hash_large = m.dcthash
 * WHERE h.hash_small = (SELECT dcthash FROM media WHERE media_id = @media_id)
 * AND t.tweet_id < @tweet_id
 * AND u.isprotected IS FALSE)
 * OR EXISTS(
 * SELECT * FROM media m
 * JOIN tweet_media USING (media_id)
 * JOIN tweet t USING (tweet_id)
 * JOIN user u USING (user_id)
 * JOIN dcthashpairslim h ON h.hash_small = m.dcthash
 * WHERE h.hash_large = (SELECT dcthash FROM media WHERE media_id = @media_id)
 * AND t.tweet_id < @tweet_id
 * AND u.isprotected IS FALSE);
 */
                {
                    mediacmd.Parameters.Add("@tweet_id", MySqlDbType.Int64).Value = t.tweet_id;
                    var mediaparam = mediacmd.Parameters.Add("@media_id", MySqlDbType.Int64);
                    foreach (long mid in t.media_id)
                    {
                        mediaparam.Value = mid;
                        //全部画像転載の時だけ次の画像に進める
                        while (true)
                        {
                            long val = await SelectCount(mediacmd, IsolationLevel.ReadUncommitted).ConfigureAwait(false);
                            if (val == 0)
                            {
                                return;
                            }
                            else if (val > 0)
                            {
                                break;
                            }
                            await Task.Delay(1000).ConfigureAwait(false);
                        }
                    }
                }
                Counter.TweetCheckHit.Increment();

                //存在確認をする画像の情報を取得(汚い
                using (var cmd = new MySqlCommand(@"SELECT
m.media_id, mt.media_url, u.screen_name
FROM media m
LEFT JOIN media_downloaded_at md ON m.media_id = md.media_id
JOIN media_text mt ON m.media_id = mt.media_id
JOIN tweet t ON m.source_tweet_Id = t.tweet_id
JOIN user u USING (user_id)
WHERE m.source_tweet_id = @tweet_id;"))
                {
                    cmd.Parameters.Add("@tweet_id", MySqlDbType.Int64).Value = t.tweet_id;

                    var medialist = new List <MediaInfo>();
                    while (!await ExecuteReader(cmd, (r) =>
                    {
                        medialist.Add(new MediaInfo()
                        {
                            media_id = r.GetInt64(0),
                            media_url = r.GetString(1),
                            screen_name = r.GetString(2),
                            source_tweet_id = t.tweet_id,
                        });
                    }).ConfigureAwait(false))
                    {
                        medialist.Clear();
                    }
                    await TryDownloadBlock.SendAsync(medialist).ConfigureAwait(false);
                }
            }, new ExecutionDataflowBlockOptions()
            {
                BoundedCapacity = Environment.ProcessorCount << 4, MaxDegreeOfParallelism = Environment.ProcessorCount
            });


            using (var cmd = new MySqlCommand(@"SELECT o.tweet_id, t.media_id
FROM tweet o USE INDEX (PRIMARY)
JOIN tweet_media t USING (tweet_id)
WHERE o.retweet_id IS NULL
AND @tweet_id <= o.tweet_id
ORDER BY o.tweet_id
LIMIT 1000;"))
            {
                var tweet_param = cmd.Parameters.Add("@tweet_id", MySqlDbType.Int64);

                //ここから始めるんじゃ(
                long last_tweet_id    = SnowFlake.SecondinSnowFlake(Begin, false);
                long exclude_tweet_id = SnowFlake.SecondinSnowFlake(Exclude, false);
                tweet_param.Value = last_tweet_id;

                var MediaIdList = new List <long>();
                var Table       = new List <(long tweet_id, long media_id)>();
                do
                {
                    do
                    {
                        Table.Clear();
                    }while (!await ExecuteReader(cmd, (r) => Table.Add((r.GetInt64(0), r.GetInt64(1))), IsolationLevel.ReadUncommitted).ConfigureAwait(false));
                    foreach (var t in Table)
                    {
                        long tweet_id = t.tweet_id;
                        if (last_tweet_id != tweet_id)
                        {
                            if (exclude_tweet_id < tweet_id)
                            {
                                break;
                            }
                            if (MediaIdList.Count > 0)
                            {
                                await CheckTweetBlock.SendAsync((last_tweet_id, MediaIdList.ToArray())).ConfigureAwait(false);

                                MediaIdList.Clear();
                            }
                            last_tweet_id       = tweet_id;
                            Counter.LastTweetID = tweet_id;
                        }
                        MediaIdList.Add(t.media_id);
                    }
                    if (Table.Count > 0)
                    {
                        tweet_param.Value = Table.Last().tweet_id + 1;
                    }
                } while (Table.Count > 0 && last_tweet_id < exclude_tweet_id);

                CheckTweetBlock.Complete();
                await CheckTweetBlock.Completion.ConfigureAwait(false);

                TryDownloadBlock.Complete();
                await TryDownloadBlock.Completion.ConfigureAwait(false);

                RemoveTweetBlock.Complete();
                await RemoveTweetBlock.Completion.ConfigureAwait(false);
            }
        }