/// <summary> /// Start main loop of processing the sources. /// </summary> private void StartProcessing() { while (true) { try { using (var db = new MMonitorContext()) { var sources = db.TheSources .Where(s => s.Enc == null && s.AutomaticalEncodingUpdateWasSuccess == null && s.IgnoreForAutomaticHelpersWork.Value != true) .Take(numOfSourcesForParallerlProcessing); if (sources.Count() == 0) { Thread.Sleep(1000 * 60 * 10); continue; } log.Info($"{sources.Count()} sources were taken for processing"); Parallel.ForEach(sources, (s) => { log.Info($"Starting processing of source {s.Url}"); Process(s); log.Info($"Ended processing of source {s.Url}"); }); log.Info("Sources processing completed"); db.SaveChanges(); log.Info("Sources updated in db"); } } catch (Exception ex) { log.Error("Error with processing sources", ex); Thread.Sleep(5000); } } }
static void Main(string[] args) { md5 = MD5.Create(); string file = System.Configuration.ConfigurationManager.AppSettings["file_with_sources"]; int row = 0; using (var db = new MMonitorContext()) { var lines = File.ReadAllLines(file); TheSource s; Uri u; string url = string.Empty; string sourceType = string.Empty; string l = string.Empty; foreach (var line in lines) { string[] parts = line.Split('\t').Select(p => p.Trim()).ToArray(); if (parts[0].Contains("xn--")) { try { u = new Uri(parts[0]); l = u.Host; } catch (Exception ex) { log.Error("Can't create source url from puny url", ex); } } if (parts[0].Contains("https://")) { l = parts[0].Replace("https://", ""); } else if (parts[0].Contains("http://")) { l = parts[0].Replace("http://", ""); } else { l = parts[0]; } if (l.Contains("www.")) { l = l.Replace("www.", ""); } if (!Uri.IsWellFormedUriString(l, UriKind.Relative) && !l.Contains(".рф")) { log.Error(string.Format("Bad format for Url {0} at line {1}", parts[0], row)); continue; } l = l.Trim().ToLower(); if (l.Contains(".рф") && !Regex.IsMatch(l, @"\S+\.рф$")) { log.Error(string.Format("Bad format for Url {0} at line {1}", parts[0], row)); continue; } s = new TheSource() { Url = l, UrlHash = GetMD5Hash(l), Lang = string.IsNullOrWhiteSpace(parts[1]) ? Langs.UNDEFINED : (Langs)Enum.Parse(typeof(Langs), parts[1]), TheSourceType = (TheSourceType)Enum.Parse(typeof(TheSourceType), parts[2]) }; if (db.TheSources.Where(x => x.UrlHash == s.UrlHash).FirstOrDefault() == null) { db.TheSources.Add(s); row++; Console.WriteLine("row is " + row); } } try { db.SaveChanges(); log.Info($"Loaded {row} sources to database"); } catch (Exception ex) { log.Error("Can't upload sources", ex); } } Console.ReadLine(); }