Exemplo n.º 1
0
        /// <summary>
        /// Start main loop of processing the sources.
        /// </summary>
        private void StartProcessing()
        {
            while (true)
            {
                try
                {
                    using (var db = new MMonitorContext())
                    {
                        var sources = db.TheSources
                                      .Where(s =>
                                             s.Enc == null &&
                                             s.AutomaticalEncodingUpdateWasSuccess == null &&
                                             s.IgnoreForAutomaticHelpersWork.Value != true)
                                      .Take(numOfSourcesForParallerlProcessing);

                        if (sources.Count() == 0)
                        {
                            Thread.Sleep(1000 * 60 * 10);
                            continue;
                        }

                        log.Info($"{sources.Count()} sources were taken for processing");

                        Parallel.ForEach(sources, (s) =>
                        {
                            log.Info($"Starting processing of source {s.Url}");
                            Process(s);
                            log.Info($"Ended processing of source {s.Url}");
                        });

                        log.Info("Sources processing completed");
                        db.SaveChanges();
                        log.Info("Sources updated in db");
                    }
                }
                catch (Exception ex)
                {
                    log.Error("Error with processing sources", ex);
                    Thread.Sleep(5000);
                }
            }
        }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            md5 = MD5.Create();
            string file = System.Configuration.ConfigurationManager.AppSettings["file_with_sources"];
            int    row  = 0;

            using (var db = new MMonitorContext())
            {
                var lines = File.ReadAllLines(file);

                TheSource s;
                Uri       u;
                string    url        = string.Empty;
                string    sourceType = string.Empty;
                string    l          = string.Empty;

                foreach (var line in lines)
                {
                    string[] parts = line.Split('\t').Select(p => p.Trim()).ToArray();
                    if (parts[0].Contains("xn--"))
                    {
                        try
                        {
                            u = new Uri(parts[0]);
                            l = u.Host;
                        }
                        catch (Exception ex)
                        {
                            log.Error("Can't create source url from puny url", ex);
                        }
                    }

                    if (parts[0].Contains("https://"))
                    {
                        l = parts[0].Replace("https://", "");
                    }
                    else if (parts[0].Contains("http://"))
                    {
                        l = parts[0].Replace("http://", "");
                    }
                    else
                    {
                        l = parts[0];
                    }

                    if (l.Contains("www."))
                    {
                        l = l.Replace("www.", "");
                    }

                    if (!Uri.IsWellFormedUriString(l, UriKind.Relative) && !l.Contains(".рф"))
                    {
                        log.Error(string.Format("Bad format for Url {0} at line {1}", parts[0], row));
                        continue;
                    }

                    l = l.Trim().ToLower();

                    if (l.Contains(".рф") && !Regex.IsMatch(l, @"\S+\.рф$"))
                    {
                        log.Error(string.Format("Bad format for Url {0} at line {1}", parts[0], row));
                        continue;
                    }

                    s = new TheSource()
                    {
                        Url           = l,
                        UrlHash       = GetMD5Hash(l),
                        Lang          = string.IsNullOrWhiteSpace(parts[1]) ? Langs.UNDEFINED : (Langs)Enum.Parse(typeof(Langs), parts[1]),
                        TheSourceType = (TheSourceType)Enum.Parse(typeof(TheSourceType), parts[2])
                    };

                    if (db.TheSources.Where(x => x.UrlHash == s.UrlHash).FirstOrDefault() == null)
                    {
                        db.TheSources.Add(s);
                        row++;
                        Console.WriteLine("row is " + row);
                    }
                }

                try
                {
                    db.SaveChanges();
                    log.Info($"Loaded {row} sources to database");
                }
                catch (Exception ex)
                {
                    log.Error("Can't upload sources", ex);
                }
            }

            Console.ReadLine();
        }