public static bool TryParse(string value, string referenceGenre, out GenrePart genrePart) { genrePart = null; if (value.StartsWith(StartString)) { var iof = value.LastIndexOf(']'); var priority = int.MaxValue; if (value.Length > iof + 1) { priority = int.Parse(value.Substring(iof + 1, value.Length - iof - 1)); } var parts = value.Substring(StartString.Length, iof - StartString.Length).Split("),(") .Select(item => item.Trim('(', ')')).ToArray(); genrePart = new GenrePart(referenceGenre, parts, priority); return(true); } return(false); }
public async Task Normalize(IReadOnlyCollection <string> recordKeys, CancellationToken cancellationToken) { await Task.Factory.StartNew(() => { using (var autoSave = Context.I.DbContainerAutoSave) { var dbContainer = autoSave.DBContainer; var records = dbContainer.AudioBookSet.GetRecordsAllWithCache(); var sw = new Stopwatch(); sw.Start(); var genres = new Dictionary <string, string>(StringComparer.InvariantCultureIgnoreCase); // Case SENSITIVE!!! As Designed. var referenceGenres = new HashSet <string>(); var genreParts = new List <GenrePart>(); foreach (var genre in _referenceGenres) { var referenceGenre = genre; var iofReference = genre.IndexOf('|'); if (iofReference > 0) { referenceGenre = genre.Substring(0, iofReference); } referenceGenres.Add(referenceGenre); var genreVariants = genre.ToLower().ReplaceAll("", " ", "-", ".").Split('|'); foreach (var genreVariant in genreVariants) { if (GenrePart.TryParse(genreVariant, referenceGenre, out var genrePart)) { genreParts.Add(genrePart); } else { genres[genreVariant] = referenceGenre; } } } var abQuantity = 0; foreach (var audioBook in records) { var audioBookGenres = audioBook.GetGenres(); var genresNormalized = new List <string>(); foreach (var audioBookGenre in audioBookGenres.Where(item => item.Length > 3)) { var genre4Resolve = _mixedCyrLatRegex.Replace(audioBookGenre, CyrLatToCyrConverter); if (referenceGenres.Contains(genre4Resolve)) { genresNormalized.Add(genre4Resolve); } else { var genreForCompare = genre4Resolve.ToLower().ReplaceAll("", " ", "-", ",", "."); var results = new List <Tuple <string, int, string> >(); if (genres.TryGetValue(genreForCompare, out var referenceGenre)) { results.Add(Tuple.Create(referenceGenre, 0, "Variant")); } else { foreach (var genreVariant in genres.Where(item => Math.Abs(item.Key.Length - genreForCompare.Length) < 3)) { var distance = LevenshteinDistance.Compute(genreVariant.Key, genreForCompare); if (distance <= 2) { results.Add(Tuple.Create(genreVariant.Value, distance, $"Distance to {genreVariant.Key}")); } } } if (results.Any()) { var candidate = results.OrderBy(item => item.Item2).First(); genresNormalized.Add(candidate.Item1); } else { var fitParts = genreParts.Where(gp => gp.IsFit(genre4Resolve)) .OrderBy(item => item.Priority).ToArray(); if (fitParts.Any()) { var minPriority = fitParts.Min(item => item.Priority); var res = minPriority < int.MaxValue ? fitParts.Where(item => item.Priority == minPriority).ToArray() : fitParts; genresNormalized.Add(genre4Resolve); foreach (var genrePart in res) { genresNormalized.Add(genrePart.ReferenceGenre); } } else if (genre4Resolve != audioBookGenre) { genresNormalized.Add(genre4Resolve); } } } } if (genresNormalized.Any()) { audioBook.Genre = string.Join(", ", genresNormalized.Distinct().OrderBy(item => item)); dbContainer.AudioBookSet.AddChangedRecords(audioBook); } cancellationToken.ThrowIfCancellationRequested(); ProgressMessage.ReportComplex(abQuantity++, records.Count); if (sw.Elapsed > SaveTimer) { dbContainer.SaveChanges(); sw.Restart(); } } ProgressMessage.ReportComplex(records.Count, records.Count); } }, cancellationToken); }