Пример #1
0
        static void CleanDuplicateReports()
        {
            Console.WriteLine("Creating connection...");

            using (var context = new Vault.Scaffold.VaultContext(
                       new DbContextOptionsBuilder <Vault.Scaffold.VaultContext>()
                       .UseNpgsql("")
                       .Options
                       ))
            {
                var sw = new Stopwatch();

                Console.WriteLine("Getting player IDs...");
                var playerIds     = context.User.OrderBy(g => g.PlayerId).Select(g => g.PlayerId).ToList().Distinct().ToList();
                var playerReports = new ConcurrentBag <List <Vault.Scaffold.Report> >();

                sw.Restart();
                Console.WriteLine("Getting all reports...");
                int numPlayersLoaded = 0;
                Parallel.ForEach(playerIds, new ParallelOptions {
                    MaxDegreeOfParallelism = 64
                }, (id) =>
                {
                    using (var playerContext = new Vault.Scaffold.VaultContext(
                               new DbContextOptionsBuilder <Vault.Scaffold.VaultContext>()
                               .UseNpgsql("")
                               .Options
                               ))
                    {
                        playerReports.Add(playerContext.Report.IncludeReportData().AsNoTracking().Where(r => r.AttackerPlayerId == id).ToList());
                    }

                    Interlocked.Increment(ref numPlayersLoaded);
                    lock (context)
                        Console.Title = "Loaded " + numPlayersLoaded + "/" + playerIds.Count;
                });

                Console.WriteLine("Got {0} reports in {1}s", playerReports.DefaultIfEmpty(new List <Vault.Scaffold.Report>()).Sum(l => l.Count), sw.ElapsedMilliseconds / 1000);

                sw.Restart();
                Console.Write("Pre-categorizing reports... ");
                var categorizedReports = playerReports
                                         .ThenFastGroupByUnordered(r => r.AccessGroupId)
                                         .ThenFastGroupByUnordered(r => r.AttackerVillageId)
                                         .ThenFastGroupByUnordered(r => r.DefenderVillageId)
                                         .ThenFastGroupByUnordered(r => r.OccuredAt);
                playerReports = null;
                GC.Collect();
                Console.WriteLine("Took {0}ms", sw.ElapsedMilliseconds);

                sw.Restart();
                Console.Write("Generating concurrent set... ");
                var reportsQueue = new BlockingCollection <List <Vault.Scaffold.Report> >(new ConcurrentBag <List <Vault.Scaffold.Report> >(categorizedReports));
                reportsQueue.CompleteAdding();
                categorizedReports = null;
                GC.Collect();
                Console.WriteLine("Took {0}ms", sw.ElapsedMilliseconds);

                int totalJobs     = reportsQueue.Count;
                var allDuplicates = new ConcurrentBag <List <Vault.Scaffold.Report> >();

                sw.Restart();
                Console.Write("Checking for duplicates within " + reportsQueue.Count + " categories... ");
                for (int i = 0; i < 32; i++)
                {
                    Task.Factory.StartNew(() =>
                    {
                        foreach (var group in reportsQueue.GetConsumingEnumerable())
                        {
                            var reportsBySignature = new Dictionary <ReportSignature, List <Vault.Scaffold.Report> >();
                            foreach (var report in group)
                            {
                                var sign = new ReportSignature(report);
                                if (!reportsBySignature.ContainsKey(sign))
                                {
                                    reportsBySignature[sign] = new List <Vault.Scaffold.Report>();
                                }
                                reportsBySignature[sign].Add(report);
                            }

                            foreach (var set in reportsBySignature.Where(kvp => kvp.Value.Count > 1).Select(kvp => kvp.Value))
                            {
                                allDuplicates.Add(set);
                            }
                        }
                    }, TaskCreationOptions.LongRunning);
                }

                while (!reportsQueue.IsCompleted)
                {
                    Thread.Sleep(1000);
                    Console.Title = reportsQueue.Count + "/" + totalJobs + " remaining";
                }

                Console.WriteLine("Took {0}m {1}s", (int)sw.Elapsed.TotalMinutes, sw.Elapsed.Seconds);
                Console.WriteLine("Total of {0} reports that were duplicated ({1} total duplicate reports)", allDuplicates.Count, allDuplicates.DefaultIfEmpty(new List <Vault.Scaffold.Report>()).Sum(l => l.Count));

                sw.Restart();
                var updateSw = Stopwatch.StartNew();
                Console.Write("Verifying duplicates... ");
                int numChecked = 0;
                foreach (var group in allDuplicates)
                {
                    var reference = group[0];
                    if (!group.All(r =>
                                   (Army)r.DefenderArmy == (Army)reference.DefenderArmy &&
                                   (Army)r.AttackerArmy == (Army)reference.AttackerArmy &&
                                   (Army)r.DefenderLossesArmy == (Army)reference.DefenderLossesArmy &&
                                   (Army)r.AttackerLossesArmy == (Army)reference.AttackerLossesArmy &&
                                   (Army)r.DefenderTravelingArmy == (Army)reference.DefenderTravelingArmy &&
                                   r.AttackerPlayerId == reference.AttackerPlayerId &&
                                   r.DefenderPlayerId == reference.DefenderPlayerId &&
                                   r.AttackerVillageId == reference.AttackerVillageId &&
                                   r.DefenderVillageId == reference.DefenderVillageId &&
                                   // Loyalty null in some cases
                                   //r.Loyalty == reference.Loyalty &&
                                   r.OccuredAt == reference.OccuredAt &&
                                   // Luck null or mismatched in some cases
                                   //r.Luck == reference.Luck &&
                                   r.WorldId == reference.WorldId &&
                                   r.AccessGroupId == reference.AccessGroupId
                                   ))
                    {
                        Console.WriteLine("Invalid duplicate set found");
                        Debugger.Break();
                    }

                    ++numChecked;

                    if (updateSw.ElapsedMilliseconds >= 1000)
                    {
                        Console.Title = "Checked " + numChecked + "/" + allDuplicates.Count;
                        updateSw.Restart();
                    }
                }

                Console.WriteLine("Took {0}m {1}s", (int)sw.Elapsed.TotalMinutes, sw.Elapsed.Seconds);

                Console.Write("Determining best copies and removable reports... ");
                sw.Restart();
                var removableReports = new List <Vault.Scaffold.Report>();
                foreach (var group in allDuplicates)
                {
                    var scores = group.Select(r => { return(new { Score = r.Loyalty != null ? 1 : 0, Report = r }); }).OrderByDescending(s => s.Score);
                    var best   = scores.First();
                    var others = group.Where(r => r != best.Report);
                    removableReports.AddRange(others);
                }
                Console.WriteLine("Took {0}ms", sw.ElapsedMilliseconds);

                Console.WriteLine("Press Enter to delete the {0} removed entries.", removableReports.Count);
                Console.ReadLine();

                Console.WriteLine("Deleting... ");
                int numCleared = 0;
                sw.Restart();
                foreach (var group in removableReports.Grouped(1500).Select(g => g.ToList()))
                {
                    context.RemoveRange(group);
                    context.SaveChanges();
                    numCleared   += group.Count;
                    Console.Title = "Cleared " + numCleared + "/" + removableReports.Count;
                }
                Console.WriteLine("Took {0}ms", sw.ElapsedMilliseconds);
            }
        }