private static void UpdateGenreStats(string genre, TtbRatios ratios) { var stats = s_genreStats[genre]; stats.MainExtrasRatio = ratios.MainExtras; stats.ExtrasCompletionistRatio = ratios.ExtrasCompletionist; stats.ExtrasPlacementRatio = ratios.ExtrasPlacement; }
private static void FixImputationZeroes(AppEntity appEntity, TtbRatios ratios, ref int imputedMain, ref int imputedExtras, ref int imputedCompletionist) { HltbScraperEventSource.Log.ImputationProducedZeroTtb( appEntity.SteamName, appEntity.SteamAppId, imputedMain, imputedExtras, imputedCompletionist, appEntity.MainTtbImputed, appEntity.ExtrasTtbImputed, appEntity.CompletionistTtbImputed); if (imputedMain == 0 && imputedExtras == 0 && imputedCompletionist == 0) { throw new InvalidOperationException("all TTBs of a not completely missing app are zeroes: " + appEntity.SteamAppId); } FixTtbZeroes(ratios, ref imputedMain, ref imputedExtras, ref imputedCompletionist); }
private static void FixImputationMiss(AppEntity appEntity, TtbRatios ratios, ref int imputedMain, ref int imputedExtras, ref int imputedCompletionist) { int originalImputedMain = imputedMain; int originalImputedExtras = imputedExtras; int originalImputedCompletionist = imputedCompletionist; FixInvalidTtbs(ref imputedMain, appEntity.MainTtbImputed, ref imputedExtras, appEntity.ExtrasTtbImputed, ref imputedCompletionist, appEntity.CompletionistTtbImputed, ratios); HltbScraperEventSource.Log.ImputationMiss( appEntity.SteamName, appEntity.SteamAppId, originalImputedMain, originalImputedExtras, originalImputedCompletionist, imputedMain, imputedExtras, imputedCompletionist, appEntity.MainTtbImputed, appEntity.ExtrasTtbImputed, appEntity.CompletionistTtbImputed); }
internal static void UpdateFromCsvRow(AppEntity appEntity, string row, TtbRatios ratios, out bool imputationZero, out bool imputationMiss) { var ttbs = row.Split(','); if (ttbs.Length != 3) { throw new InvalidOperationException("Invalid CSV row, contains more than 3 values: " + row); } var imputedMain = GetRoundedValue(ttbs[0]); var imputedExtras = GetRoundedValue(ttbs[1]); var imputedCompletionist = GetRoundedValue(ttbs[2]); UpdateFromImputedValues(appEntity, imputedMain, imputedExtras, imputedCompletionist, ratios, out imputationZero, out imputationMiss); }
private static void FixTtbZeroes(TtbRatios ratios, ref int mainTtb, ref int extrasTtb, ref int completionistTtb) { //May result in invalid TTBs, so generally FixInvalidTtbs should be used as well if (mainTtb == 0) { if (extrasTtb == 0) { extrasTtb = CalculateTtbFromRatio(completionistTtb, ratios.ExtrasCompletionist); } mainTtb = CalculateTtbFromRatio(extrasTtb, ratios.MainExtras); //we know that extrasTtb is non-zero now } if (extrasTtb == 0) { extrasTtb = CalculateTtbFromRatio(mainTtb, 1 / ratios.MainExtras); //we know mainTtb is non-zero now } if (completionistTtb == 0) { completionistTtb = CalculateTtbFromRatio(extrasTtb, 1 / ratios.ExtrasCompletionist); //we know extrasTtb is non-zero now } }
internal static void FixInvalidTtbs( ref int mainTtb, bool mainImputed, ref int extrasTtb, bool extrasImputed, ref int completionistTtb, bool completionistImputed, TtbRatios ratios) { //recall that Sanitize() made sure that non-imputed values were ordered correctly (main <= extras <= completionist) if (mainTtb > extrasTtb) { if (extrasTtb >= completionistTtb) // M > E >= C { if (!mainImputed) { //main is not imputed, which means both extras and completionist must be imputed extrasTtb = CalculateTtbFromRatio(mainTtb, 1 / ratios.MainExtras); //now that both extras and main are valid, we can fix completionist based on extras completionistTtb = CalculateTtbFromRatio(extrasTtb, 1 / ratios.ExtrasCompletionist); } else if (!extrasImputed) { //extras is not imputed, which means main must be imputed mainTtb = CalculateTtbFromRatio(extrasTtb, ratios.MainExtras); if (extrasTtb > completionistTtb) { //completionist must be imputed as well (since extras and main are valid, we can fix completionist based on extras) completionistTtb = CalculateTtbFromRatio(extrasTtb, 1 / ratios.ExtrasCompletionist); } } else //!appEntity.CompletionistTtbImputed { if (extrasTtb > completionistTtb) { //completionist is not imputed, which means extras must be imputed (in fact we already know both main and extras are imputed) extrasTtb = CalculateTtbFromRatio(completionistTtb, ratios.ExtrasCompletionist); } //now that both extras and completionist are valid, we can fix main based on extras mainTtb = CalculateTtbFromRatio(extrasTtb, ratios.MainExtras); } } else if (completionistTtb >= mainTtb) // C >= M > E { if (mainImputed) { //main is imputed and extras is already less than completionist, so we'll just fix main mainTtb = CalculateTtbFromRatio(extrasTtb, ratios.MainExtras); } else { //main is not imputed and is larger than extras, meaning extras must be imputed extrasTtb = (int)(mainTtb + ratios.ExtrasPlacement * (completionistTtb - mainTtb)); } } else // M > C > E { if (mainImputed) { //main is larger than both extras and completionist (which themselves are valid), therefore needs to be fixed mainTtb = CalculateTtbFromRatio(extrasTtb, ratios.ExtrasCompletionist); } else { //main is not imputed and is bigger than extras, therefore extras must be imputed and needs to be fixed extrasTtb = CalculateTtbFromRatio(mainTtb, 1 / ratios.MainExtras); //since main > completionist and now extras >= main, we need to fix completionist (which must have been imputed) completionistTtb = CalculateTtbFromRatio(extrasTtb, 1 / ratios.ExtrasCompletionist); } } } else //extrasTtb > completionistTtb { if (completionistTtb >= mainTtb) // E > C >= M { if (completionistImputed) { //completionist is imputed and main is already less than extras, so we'll just fix completionist completionistTtb = CalculateTtbFromRatio(extrasTtb, 1 / ratios.ExtrasCompletionist); } else { //completionist is not imputed and smaller than extras, therefore extras must be fixed extrasTtb = (int)(mainTtb + ratios.ExtrasPlacement * (completionistTtb - mainTtb)); } } // There's no need to check M > E > C because the case M > E was already handled above else // E >= M > C { if (completionistImputed) { //completionist is imputed and extras is already larger than (or equal to) main, so we'll just fix completionist completionistTtb = CalculateTtbFromRatio(extrasTtb, 1 / ratios.ExtrasCompletionist); } else { //completionist is not imputed but both main and extras are larger than it, which means both are imputed and need to be fixed extrasTtb = CalculateTtbFromRatio(completionistTtb, ratios.ExtrasCompletionist); mainTtb = CalculateTtbFromRatio(extrasTtb, ratios.MainExtras); } } } }
private static void UpdateFromImputedValues( AppEntity appEntity, int imputedMain, int imputedExtras, int imputedCompletionist, TtbRatios ratios, out bool imputationZero, out bool imputationMiss) { HandleOverridenTtb(appEntity, "main", appEntity.MainTtb, appEntity.MainTtbImputed, ref imputedMain); HandleOverridenTtb(appEntity, "extras", appEntity.ExtrasTtb, appEntity.ExtrasTtbImputed, ref imputedExtras); HandleOverridenTtb(appEntity, "completionist", appEntity.CompletionistTtb, appEntity.CompletionistTtbImputed, ref imputedCompletionist); if (imputedMain == 0 || imputedExtras == 0 || imputedCompletionist == 0) { imputationZero = true; FixImputationZeroes(appEntity, ratios, ref imputedMain, ref imputedExtras, ref imputedCompletionist); } else { imputationZero = false; } if (imputedMain > imputedExtras || imputedExtras > imputedCompletionist) { imputationMiss = true; FixImputationMiss(appEntity, ratios, ref imputedMain, ref imputedExtras, ref imputedCompletionist); } else { imputationMiss = false; } appEntity.FixTtbs(imputedMain, imputedExtras, imputedCompletionist); }
private static async Task ImputeCore(string genre, IReadOnlyList <AppEntity> notCompletelyMissing, TtbRatios ratios) { HltbScraperEventSource.Log.CalculateImputationStart(genre, notCompletelyMissing.Count); string imputed = await InvokeImputationService(genre, notCompletelyMissing).ConfigureAwait(false); var imputedRows = imputed .Split(new [] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries) .Skip(1) //skip header row .Where(s => !String.IsNullOrWhiteSpace(s)).ToArray(); if (imputedRows.Length != notCompletelyMissing.Count) { throw new InvalidOperationException(String.Format(CultureInfo.InvariantCulture, "imputation count mismatch: expected {0}, actual {1}", notCompletelyMissing.Count, imputedRows.Length)); } int imputationZeroes = 0; int imputationMisses = 0; for (int i = 0; i < notCompletelyMissing.Count; i++) { UpdateFromCsvRow(notCompletelyMissing[i], imputedRows[i], ratios, out var imputationZero, out var imputationMiss); if (imputationMiss) { imputationMisses++; } if (imputationZero) { imputationZeroes++; } } if ((double)imputationZeroes / notCompletelyMissing.Count > ImputationZerosThreshold) { HltbScraperEventSource.Log.ImputationProducedTooManyZeroTtbs(genre, imputationZeroes, notCompletelyMissing.Count); } if ((double)imputationMisses / notCompletelyMissing.Count > ImputationMissThreshold) { HltbScraperEventSource.Log.ImputationProducedTooManyMisses(genre, imputationMisses, notCompletelyMissing.Count); } HltbScraperEventSource.Log.CalculateImputationStop(genre, notCompletelyMissing.Count); }
private static async Task Impute(IReadOnlyCollection <AppEntity> apps, string genre, TtbRatios ratios, bool initial) { var notCompletelyMissing = apps.Where(a => !a.MainTtbImputed || !a.ExtrasTtbImputed || !a.CompletionistTtbImputed).ToArray(); if (notCompletelyMissing.Length < ImputationThreshold) { if (initial) { throw new InvalidOperationException(String.Format(CultureInfo.InvariantCulture, "Insufficient amount of not completely missing games in game type '{0}': {1}", genre, notCompletelyMissing.Length)); } if (notCompletelyMissing.Length == 0 && apps.Count > NotCompletelyMissingThreshold && !KnownMissingGenres.Contains(genre)) { //Detected probable scraping issue HltbScraperEventSource.Log.GenreHasNoTtbs(genre); } //too few samples to say anything smart, we'll just take the average (there is always at least one app per genre) UpdateGenreStats(genre, (int)apps.Average(a => a.MainTtb), (int)apps.Average(a => a.ExtrasTtb), (int)apps.Average(a => a.CompletionistTtb)); return; } try { await ImputeCore(genre, notCompletelyMissing, ratios).ConfigureAwait(false); } catch (Exception e) { HltbScraperEventSource.Log.ImputationError(genre, e.Message); if (initial) { throw; } } FillCompletelyMissing(genre, apps, notCompletelyMissing); }
private static TtbRatios GetTtbRatios(string genre, IReadOnlyCollection <AppEntity> apps, TtbRatios fallback) { var ratios = GetTtbRatiosCore(apps); const double tolerance = 0.0001; bool mainExtrasMissing = Math.Abs(ratios.MainExtras) < tolerance; bool extrasCompletionistMissing = Math.Abs(ratios.ExtrasCompletionist) < tolerance; bool extrasPlacementMissing = Math.Abs(ratios.ExtrasPlacement) < tolerance; if (fallback == null && (mainExtrasMissing || extrasCompletionistMissing || extrasPlacementMissing)) { throw new InvalidOperationException(String.Format(CultureInfo.InvariantCulture, "No record exists for game type '{0}' for which both main and extras (or extras and completionist) are present", genre)); } return(new TtbRatios( mainExtrasMissing ? fallback.MainExtras : ratios.MainExtras, extrasCompletionistMissing ? fallback.ExtrasCompletionist : ratios.ExtrasCompletionist, extrasPlacementMissing ? fallback.ExtrasPlacement : ratios.ExtrasPlacement)); }
private static async Task <TtbRatios> ImputeGenreAndGetRatios(IReadOnlyCollection <AppEntity> apps, string genre, TtbRatios fallbackRatios, bool initial) { HltbScraperEventSource.Log.ImputeGenreStart(genre, apps.Count); var ratios = GetTtbRatios(genre, apps, fallbackRatios); UpdateGenreStats(genre, ratios); await Impute(apps, genre, ratios, initial).ConfigureAwait(false); HltbScraperEventSource.Log.ImputeGenreStop(genre, apps.Count); return(ratios); }