public static void Run() { BlogContextFactory blogContextFactory = new BlogContextFactory(); var filenames = System.IO.Directory.EnumerateFiles(@"F:\bak\upload").Aggregate(new Dictionary <string, List <string> >(), (a, k) => { k = System.IO.Path.GetFileName(k); var key = k; if (k.Length > 13) { key = k.Substring(k.Length - 13); } if (a.TryGetValue(key, out var v)) { v.Add(k); } else { a.Add(key, new List <string>() { k }); } return(a); }); using (var db = blogContextFactory.Create()) { var allLegacies = db.Blogs.Where(b => b.IsLocalImg && !b.ImagePath.Contains("static.gmgard")).ToList(); Console.Out.WriteLine("total legacy: " + allLegacies.Count); foreach (var item in allLegacies) { var imgs = item.ImagePath.Split(';').Select(s => { if (filenames.TryGetValue(s.Substring(s.Length - 13), out var v)) { if (v.Count == 1) { return(v[0]); } if (v.Contains(s)) { return(s); } Console.Out.WriteLine("Please choose for " + s); for (int i = 0; i < v.Count; i++) { Console.Out.WriteLine($"{i}: {v[i]}"); } var input = Console.ReadLine(); return(v[int.Parse(input)]); } return(s); }); item.ImagePath = string.Join(";", imgs.Select(s => "//static.gmgard.us/Images/upload/" + s)); } db.SaveChanges(); } }
public static void FillMonth(int year, int month) { DateTime firstday = new DateTime(year, month, 1); DateTime lastDay = new DateTime(firstday.Year, firstday.Month, DateTime.DaysInMonth(firstday.Year, firstday.Month)); var factory = new BlogContextFactory(); using (var db = factory.Create()) { var oldRankings = db.HistoryRankings.Where(h => h.RankType == HistoryRanking.Type.RankMonthly && DbFunctions.DiffMonths(lastDay, h.RankDate) == 0).ToList(); db.HistoryRankings.RemoveRange(oldRankings); var rankings = db.BlogRatings.Where(r => DbFunctions.DiffMonths(firstday, r.ratetime) == 0).GroupBy(r => r.BlogID) .Select(g => new { blogId = g.Key, rating = g.Sum(r => r.value) }) .Join( db.Blogs.Where(b => b.isApproved == true && !(new[] { 11, 12 }).Contains(b.CategoryID)), a => a.blogId, b => b.BlogID, (a, b) => new { blog = b, a.rating, postCount = db.Posts.Count(p => p.IdType == ItemType.Blog && p.ItemId == b.BlogID) } ).OrderByDescending(r => r.rating) .ThenByDescending(r => r.blog.BlogDate) .Take(5) .ToList().Select(r => new HistoryRanking { Author = r.blog.Author, BlogDate = r.blog.BlogDate, BlogID = r.blog.BlogID, BlogThumb = firstImgPath(r.blog), BlogTitle = r.blog.BlogTitle, BlogVisit = r.blog.BlogVisit, PostCount = r.postCount, Rating = r.rating, RankType = HistoryRanking.Type.RankMonthly, RankDate = lastDay, }); db.HistoryRankings.AddRange(rankings); db.SaveChanges(); } }
public static void Run() { BlogContextFactory blogContextFactory = new BlogContextFactory(); UsersContextFactory usersContextFactory = new UsersContextFactory(); using (var udb = usersContextFactory.Create()) using (var db = blogContextFactory.Create()) { var start = new DateTime(2017, 11, 24); var audits = db.BlogAudits.Where(b => b.AuditDate > start).GroupBy(b => b.BlogID); var finalizedAudits = audits.Where(g => g.Any(b => b.AuditAction == BlogAudit.Action.Approve || b.AuditAction == BlogAudit.Action.Deny)); var latestAuditors = finalizedAudits.SelectMany(g => g.Select(b => b.Auditor)); var stats = db.BlogAudits.Where(ba => ba.AuditAction == BlogAudit.Action.Approve || ba.AuditAction == BlogAudit.Action.Deny) .GroupJoin(db.BlogAudits.Where(ba => latestAuditors.Contains(ba.Auditor) && (ba.AuditAction == BlogAudit.Action.VoteApprove || ba.AuditAction == BlogAudit.Action.VoteDeny)), ba => new { ba.BlogID, ba.BlogVersion }, la => new { la.BlogID, la.BlogVersion }, (ba, la) => new { Decision = ba, Votes = la }) .SelectMany(d => d.Votes, (d, v) => new { Auditor = v.Auditor, Correct = (v.AuditAction == BlogAudit.Action.VoteApprove && d.Decision.AuditAction == BlogAudit.Action.Approve) || (v.AuditAction == BlogAudit.Action.VoteDeny && d.Decision.AuditAction == BlogAudit.Action.Deny) }).GroupBy(v => v.Auditor).ToDictionary(g => g.Key.ToLower(), g => new { CorrectCount = g.Count(d => d.Correct), Total = g.Count() }); var usersToUpdate = stats.Keys; var updates = udb.Auditors.Where(a => usersToUpdate.Contains(a.User.UserName)).Select(a => new { a.User.UserName, Auditor = a }); foreach (var update in updates) { int total = 0, correctcount = 0; if (stats.ContainsKey(update.UserName.ToLower())) { var stat = stats[update.UserName.ToLower()]; total = stat.Total; correctcount = stat.CorrectCount; } update.Auditor.AuditCount = total + 1; update.Auditor.CorrectCount = correctcount; } udb.SaveChanges(); } }
public static void Run() { BlogContextFactory blogContextFactory = new BlogContextFactory(); using (var db = blogContextFactory.Create()) { var joins = db.HistoryRankings.Join(db.Blogs, r => r.BlogID, b => b.BlogID, (r, b) => new { rank = r, blog = b, pc = db.Posts.Count(p => p.ItemId == b.BlogID && p.IdType == ItemType.Blog) }); int count = joins.Count(); Console.WriteLine("Total ranking count: {0}", count); for (int i = 0; i < count; i += BATCH_SIZE) { var items = joins.OrderBy(b => b.rank.RankDate).Skip(i).Take(BATCH_SIZE).ToList(); foreach (var item in items) { item.rank.Author = item.blog.Author; item.rank.BlogDate = item.blog.BlogDate; item.rank.BlogThumb = firstImgPath(item.blog); item.rank.BlogTitle = item.blog.BlogTitle; item.rank.BlogVisit = item.blog.BlogVisit; item.rank.PostCount = item.pc; } db.SaveChanges(); Console.WriteLine($"Done {i+BATCH_SIZE}"); } } using (var db = blogContextFactory.Create()) { var rankdata = File.ReadAllText(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "../../../../App_Data/ranking.js")); var rankings = JsonConvert.DeserializeObject <RankingList>(rankdata); db.HistoryRankings.AddRange(rankings.r1m.Select(r => new HistoryRanking { Author = r.Author, BlogDate = r.BlogDate, BlogID = r.BlogID, BlogThumb = r.BlogThumb, BlogTitle = r.BlogTitle, BlogVisit = r.BlogVisit, PostCount = r.PostCount, RankDate = DateTime.Today, RankType = HistoryRanking.Type.RankMonthly, Rating = r.Rating })); db.HistoryRankings.AddRange(rankings.r24h.Select(r => new HistoryRanking { Author = r.Author, BlogDate = r.BlogDate, BlogID = r.BlogID, BlogThumb = r.BlogThumb, BlogTitle = r.BlogTitle, BlogVisit = r.BlogVisit, PostCount = r.PostCount, RankDate = DateTime.Today, RankType = HistoryRanking.Type.Rank24h, Rating = r.Rating })); rankdata = File.ReadAllText(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "../../../../App_Data/monthly_ranking.js")); var AllRankings = JsonConvert.DeserializeObject <Dictionary <DateTime, IEnumerable <Ranking> > >(rankdata) ?? new Dictionary <DateTime, IEnumerable <Ranking> >(); db.HistoryRankings.AddRange(AllRankings.SelectMany(kvp => kvp.Value.Select(r => new HistoryRanking { Author = r.Author, BlogDate = r.BlogDate, BlogID = r.BlogID, BlogThumb = r.BlogThumb, BlogTitle = r.BlogTitle, BlogVisit = r.BlogVisit, PostCount = r.PostCount, RankDate = kvp.Key, RankType = HistoryRanking.Type.RankMonthly, Rating = r.Rating }))); for (int year = 2014; year <= 2018; year++) { var firstDay = new DateTime(year, 1, 1); var daySinceMonday = DayOfWeek.Monday - firstDay.DayOfWeek; if (daySinceMonday < 0) { daySinceMonday += 7; } var firstMonday = daySinceMonday == 0 ? firstDay : firstDay.AddDays(daySinceMonday); for (int week = 1; week <= 52; week++) { var path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, $"../../../../App_Data/ranking{year}{week}.js"); if (!File.Exists(path)) { continue; } rankdata = File.ReadAllText(path); var ranking = JsonConvert.DeserializeObject <IEnumerable <Ranking> >(rankdata); db.HistoryRankings.AddRange(ranking.Select(r => new HistoryRanking { Author = r.Author, BlogDate = r.BlogDate, BlogID = r.BlogID, BlogThumb = r.BlogThumb, BlogTitle = r.BlogTitle, BlogVisit = r.BlogVisit, PostCount = r.PostCount, RankDate = firstMonday.AddDays(6 + (week - 1) * 7), RankType = HistoryRanking.Type.RankWeekly, Rating = r.Rating })); } } db.SaveChanges(); } }
public static void Run() { var regex1 = new Regex(@" \( # Match ( ( [^()]+ # all chars except () | (?<Level>\() # or if ( then Level += 1 | (?<-Level>\)) # or if ) then Level -= 1 )+ # Repeat (to go from inside to outside) (?(Level)(?!)) # zero-width negative lookahead assertion \) # Match )", RegexOptions.IgnorePatternWhitespace); var regex2 = new Regex(@" \[ # Match [ ( [^\[\]]+ # all chars except [] | (?<Level>\[) # or if [ then Level += 1 | (?<-Level>\]) # or if ] then Level -= 1 )+ # Repeat (to go from inside to outside) (?(Level)(?!)) # zero-width negative lookahead assertion \] # Match ]", RegexOptions.IgnorePatternWhitespace); var keywordDict = new Dictionary <string, int>(); BlogContextFactory blogContextFactory = new BlogContextFactory(); using (var db = blogContextFactory.Create()) { var blogCount = db.Blogs.Count(); for (int i = 0; i < blogCount; i += BATCH_SIZE) { var titles = db.Blogs.OrderBy(b => b.BlogID).Skip(i).Take(BATCH_SIZE).Select(t => t.BlogTitle).ToList(); foreach (var title in titles) { if (BalancedParanthesis(title, '(', ')')) { var matches = regex1.Matches(title); foreach (Match m in matches) { var key = m.Value.Substring(1, m.Value.Length - 2); if (dateSizeRegex.IsMatch(key)) { continue; } if (keywordDict.ContainsKey(key)) { keywordDict[key] = keywordDict[key] + 1; } else { keywordDict[key] = 1; } } } if (BalancedParanthesis(title, '[', ']')) { var matches = regex2.Matches(title); foreach (Match m in matches) { var key = m.Value.Substring(1, m.Value.Length - 2); if (dateSizeRegex.IsMatch(key)) { continue; } if (keywordDict.ContainsKey(key)) { keywordDict[key] = keywordDict[key] + 1; } else { keywordDict[key] = 1; } } } } } } File.WriteAllLines("output.txt", keywordDict.OrderByDescending(kvp => kvp.Value).Select(kvp => $"{kvp.Key}: {kvp.Value}")); }
public static void Run() { BlogContextFactory blogContextFactory = new BlogContextFactory(); using (var db = blogContextFactory.Create()) { var totalBlogs = db.Blogs.Where(b => b.BlogID > 0).Count(); Console.WriteLine($"total blogs: {totalBlogs}"); int lastblog = LAST_BLOG_ID; for (int i = 0; i < totalBlogs; i += BATCH_SIZE) { var blogs = db.Blogs.Where(b => b.BlogID > LAST_BLOG_ID && b.Links.Length > 0).OrderBy(b => b.BlogID).Skip(i).Take(BATCH_SIZE); foreach (var blog in blogs) { var link = Newtonsoft.Json.JsonConvert.DeserializeObject <BlogLink[]>(blog.Links); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(blog.Content); var ns = doc.DocumentNode.SelectNodes("//span[@class=\"label label-inverse\"]"); if (ns == null) { continue; } var nodes = ns.Where(n => n.InnerText.Length == 4); if (nodes.Count() > 0 && link.Count() > 0) { if (link.Count() != nodes.Count()) { if (link.Count() == 1) { Console.WriteLine("multiple pass for single link, using last one: " + nodes.Last().InnerText); link.ElementAt(0).pass = nodes.Last().InnerText; } else if (nodes.Count() == 1 && link.Count(b => b.url.Contains("pan.baidu.com")) == 1) { link.Single(b => b.url.Contains("pan.baidu.com")).pass = nodes.First().InnerText; } else { Console.WriteLine(blog.BlogID + " Manual select: "); for (int j = 0; j < link.Count(); j++) { Console.WriteLine(j + ": " + Newtonsoft.Json.JsonConvert.SerializeObject(link.ElementAt(j), Newtonsoft.Json.Formatting.Indented)); } Console.WriteLine(nodes.ElementAt(0).ParentNode.InnerHtml); for (int j = 0; j < nodes.Count(); j++) { Console.WriteLine($"{j}: {nodes.ElementAt(j).InnerText} Enter index [0-{link.Count() - 1}]:"); int k; string input; do { input = Console.ReadLine(); }while (!int.TryParse(input, out k)); if (k >= 0 && k < nodes.Count() && k < link.Count()) { link.ElementAt(k).pass = nodes.ElementAt(j).InnerText; } } } } else { for (int j = 0; j < nodes.Count(); j++) { link.ElementAt(j).pass = nodes.ElementAt(j).InnerText; } } blog.Links = Newtonsoft.Json.JsonConvert.SerializeObject(link); } lastblog = blog.BlogID; } db.SaveChanges(); Console.WriteLine($"{blogs.Count()} blogs saved. last saved blog: {lastblog}"); } } }
public static void Run(string endpoint, string username, string password, bool create = false) { var settings = new ConnectionSettings(new Uri(endpoint)).DefaultIndex("blogs").BasicAuthentication(username, password); var client = new ElasticClient(settings); if (create) { var resp = client.Indices.Create("blogs", cid => cid .Map <BlogIndexed>(m => m.AutoMap() .Properties(p => p.Keyword(kp => kp.Name(b => b.Author).Normalizer("lowercase"))) .Properties(p => p.Text(tp => tp.Name(b => b.Title).Fields(f => f.Text(tf => tf.Analyzer("ngram_lc").Name("ngram_lc"))))) .Properties(p => p.Keyword(tp => tp.Name(b => b.Tags).Fields(f => f.Text(tf => tf.Analyzer("ngram_lc").Name("ngram_lc")))))) .Settings(i => i.Setting("max_ngram_diff", 30) .Setting("max_result_window", 100000) .Setting("max_rescore_window", 100000) .Analysis(a => a.Analyzers(ana => ana.Custom("ngram_lc", c => c.Filters("lowercase").Tokenizer("ngram_tokenizer"))) .Tokenizers(t => t.NGram("ngram_tokenizer", n => n.MaxGram(30).MinGram(1).TokenChars(TokenChar.Letter, TokenChar.Digit))) .Normalizers(n => n.Custom("lowercase", cn => cn.Filters("lowercase")))))); if (!resp.IsValid) { Console.WriteLine("error creating index"); return; } } BlogContextFactory blogContextFactory = new BlogContextFactory(); using (var db = blogContextFactory.Create()) { var totalBlogs = db.Blogs.Where(b => b.BlogID > 0).Count(); Console.WriteLine($"total blogs: {totalBlogs}"); int lastBlogId = LAST_BLOG_ID; for (int i = 0; i < totalBlogs; i += BATCH_SIZE) { var blogs = db.Blogs.Where(b => b.BlogID > LAST_BLOG_ID).OrderBy(b => b.BlogID).Skip(i).Take(BATCH_SIZE) .GroupJoin(db.Posts.Where(p => p.IdType == GmGard.Models.ItemType.Blog), b => b.BlogID, p => p.PostId, (b, p) => new { blog = b, post = p.Count() }) .GroupJoin(db.TagsInBlogs.DefaultIfEmpty(), b => b.blog.BlogID, tib => tib.BlogID, (b, tib) => new { b.blog, tag = tib.Select(t => t.tag), b.post, }).ToList(); Console.WriteLine($"Send Items for {i} to {i + BATCH_SIZE - 1}"); var bulk = client.BulkAll(blogs.Select(b => new BlogIndexed { Id = b.blog.BlogID, Title = b.blog.BlogTitle, Content = b.blog.Content, Tags = b.tag.Select(t => t.TagName), CreateDate = b.blog.BlogDate, CategoryId = b.blog.CategoryID, Author = b.blog.Author, IsHarmony = b.blog.isHarmony, IsApproved = b.blog.isApproved, BlogVisit = b.blog.BlogVisit, PostCount = b.post, Rating = b.blog.Rating ?? 0, ImagePath = b.blog.ImagePath, IsLocalImg = b.blog.IsLocalImg, }), s => s // in case of 429 response, how long we should wait before retrying .BackOffTime(TimeSpan.FromSeconds(5)) // in case of 429 response, how many times to retry before failing .BackOffRetries(5) .Index <BlogIndexed>()); var waitHandle = new ManualResetEvent(false); var bulkAllObserver = new BulkAllObserver( onNext: bulkAllResponse => { // do something after each bulk request Console.WriteLine($"Done page {bulkAllResponse.Page} with retry {bulkAllResponse.Retries}"); }, onError: exception => { waitHandle.Set(); throw exception; }, onCompleted: () => { // do something when all bulk operations complete waitHandle.Set(); }); bulk.Subscribe(bulkAllObserver); waitHandle.WaitOne(); if (blogs.Count > 0) { lastBlogId = blogs.Last().blog.BlogID; } if (blogs.Count < BATCH_SIZE) { break; } } client.Indices.Refresh(Indices.Index("blogs")); Console.WriteLine($"last blogs: {lastBlogId}"); Console.ReadLine(); } }