예제 #1
0
        private static void ProcessXML(Devmasters.Args args, string name)
        {
            logger.Debug($"Starting {name}.xml");
            if (System.IO.File.Exists(name + ".xml"))
            {
                if (args.Exists("/uselocal"))
                {
                    //skip next, use local file
                }
                else if (force || (DateTime.Now - new System.IO.FileInfo(name + ".xml").LastWriteTime).TotalDays > 4)
                {
                    logger.Debug($"downloading new {name}.xml");
                    Console.WriteLine($"Downloading new {name}");
                    DownloadFile(name);
                }
            }
            else
            {
                logger.Debug($"downloading new {name}.xml");
                Console.WriteLine($"Downloading {name}");
                DownloadFile(name);
            }

            if (!System.IO.File.Exists(name + ".xml"))
            {
                return;
            }

            rawXML d = null;

            Console.WriteLine($"Deserializing {name}");
            logger.Debug($"Deserializing {name}.xml");
            using (var xmlReader = new System.IO.StreamReader(name + ".xml"))
            {
                var serializer = new XmlSerializer(typeof(rawXML));
                d = (rawXML)serializer.Deserialize(xmlReader);
            }
            Console.WriteLine($"{d.Subjekt?.Count()} subjects");



            Devmasters.Batch.Manager.DoActionForAll <xmlSubjekt>(d.Subjekt //.Where(m=>m.ico== "3493661")  //debug
                                                                 , subj =>
            {
                majitele item = majitele.GetMajitele(subj);
                if (item != null && item?.skutecni_majitele?.Count() > 0)
                {
                    if (!ds.ItemExists(item.ico) || force)
                    {
                        item.UpdateOsobaId();
                        ds.AddOrUpdateItem(item, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite);
                    }
                    else
                    {
                        //check change
                        var old = ds.GetItem(item.ico);
                        if (old != null)
                        {
                            var same = true;
                            if (old.skutecni_majitele?.Count() != item.skutecni_majitele?.Count())
                            {
                                same = false;
                            }
                            else if (item.skutecni_majitele?.Count() == old.skutecni_majitele?.Count() && item.skutecni_majitele?.Count() > 0)
                            {
                                foreach (var sm in item.skutecni_majitele)
                                {
                                    same = same && old.skutecni_majitele.Any(m =>
                                                                             m.osoba_jmeno == sm.osoba_jmeno &&
                                                                             m.osoba_prijmeni == sm.osoba_prijmeni &&
                                                                             m.osoba_datum_narozeni == sm.osoba_datum_narozeni &&
                                                                             m.osoba_titul_pred == sm.osoba_titul_pred &&
                                                                             m.osoba_titul_za == sm.osoba_titul_za &&
                                                                             m.adresa_cast_obce == sm.adresa_cast_obce &&
                                                                             m.adresa_cislo_ev == sm.adresa_cislo_ev &&
                                                                             m.adresa_cislo_or == sm.adresa_cislo_or &&
                                                                             m.adresa_cislo_po == sm.adresa_cislo_po &&
                                                                             m.adresa_obec == sm.adresa_obec &&
                                                                             m.adresa_okres == sm.adresa_okres &&
                                                                             m.adresa_psc == sm.adresa_psc &&
                                                                             m.adresa_stat_nazev == sm.adresa_stat_nazev &&
                                                                             m.adresa_text == sm.adresa_text &&
                                                                             m.adresa_ulice == sm.adresa_ulice &&
                                                                             m.slovni_vyjadreni == sm.slovni_vyjadreni &&
                                                                             m.podil == sm.podil &&
                                                                             m.postaveni == sm.postaveni &&
                                                                             !string.IsNullOrEmpty(m.osobaId)
                                                                             );
                                }
                            }
                            if (same == false)
                            {
                                item.UpdateOsobaId();
                                ds.AddOrUpdateItem(item, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite);
                            }
                        }
                    }
                }
                return(new Devmasters.Batch.ActionOutputData());
            }, Devmasters.Batch.Manager.DefaultOutputWriter, Devmasters.Batch.Manager.DefaultProgressWriter,
                                                                 !System.Diagnostics.Debugger.IsAttached,
                                                                 maxDegreeOfParallelism: 4, prefix: $"{name} ITEMS ");
        }
예제 #2
0
        static void Main(string[] arguments)
        {
            Console.WriteLine($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}");
            Devmasters.Logging.Logger.Root.Info($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}");
            Devmasters.Logging.Logger.Root.Debug("Jednání Rady ČT starting with " + string.Join(',', arguments));


            var args = new Devmasters.Args(arguments, new string[] { "/mp3path", "/apikey" });

            if (args.MandatoryPresent() == false)
            {
                Help();
            }

            mp3path = args.Get("/mp3path", null);

            if (args.Exists("/utdl"))
            {
                YTDL = args["/utdl"];
            }
            else
            {
                YTDL = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "\\youtube-dl.exe";
            }

            startPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);

            apiKey   = args["/apikey"];
            rewrite  = args.Exists("/rewrite");
            afterDay = DateTime.Now.Date.AddDays(-1 * args.GetNumber("/daysback", 10000).Value);
            if (args.Exists("/ids"))
            {
                ids = args.GetArray("/ids");
            }
            skips2t = args.Exists("/skips2t");



            int threads = args.GetNumber("/t") ?? 5;

            try
            {
                ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .OpenDataset(apiKey, DataSetId);
            }
            catch (ApiException e)
            {
                ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .CreateDataset(apiKey, Registration());
            }
            catch (Exception e)
            {
                throw;
            }



            string nextPages = "https://www.ceskatelevize.cz/ivysilani/10000000064-jednani-rady-ceske-televize/dalsi-casti/{0}";

            int            page    = 0;
            bool           stop    = false;
            List <Jednani> jednani = new List <Jednani>();

            do
            {
                page++;
                using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(string.Format(nextPages, page)))
                {
                    Console.WriteLine($"Page {page}");
                    net.IgnoreHttpErrors     = true;
                    net.Tries                = 5;
                    net.TimeInMsBetweenTries = 2000;
                    string html = "";
                    try
                    {
                        Devmasters.Logging.Logger.Root.Debug($"downloading {net.Url} ");
                        html = net.GetContent().Text;
                    }
                    catch (Exception e)
                    {
                        Devmasters.Logging.Logger.Root.Error($"{net.Url} failed", e);
                    }

                    Devmasters.XPath xp = new Devmasters.XPath(html);
                    var links           = xp.GetNodes("//li[contains(@class,'itemBlock')]");
                    if (links == null || links.Count == 0)
                    {
                        break;
                    }

                    foreach (var link in links)
                    {
                        Jednani j = new Jednani();
                        j.Odkaz        = urlPrefix + Devmasters.XPath.Tools.GetNodeAttributeValue(link, "div/h3/a[@class='itemSetPaging']", "href");
                        j.Titulek      = Devmasters.XPath.Tools.GetNodeText(link, "div/h3/a[@class='itemSetPaging']").Trim();
                        j.DatumJednani = Devmasters.DT.Util.ToDate(Devmasters.XPath.Tools.GetNodeText(link, "div/p").Trim()) ?? DateTime.MinValue;
                        j.Id           = Devmasters.RegexUtil.GetRegexGroupValue(j.Odkaz, "/ivysilani/10000000064-jednani-rady-ceske-televize/(?<id>\\d{2,})", "id");
                        if (j.DatumJednani > afterDay &&
                            (ids == null || ids.Contains(j.Id))
                            )
                        {
                            jednani.Add(j);
                        }
                    }
                }
            } while (stop == false);

            //
            Devmasters.Logging.Logger.Root.Debug($"Starting {jednani.Count} items ");

            Devmasters.Batch.Manager.DoActionForAll <string>(jednani.Select(m => m.Id).Reverse(),
                                                             id =>
            {
                bool exists = ds.ItemExists(id);
                if (!string.IsNullOrEmpty(id) &&
                    (!exists || rewrite)
                    )
                {
                    Devmasters.Logging.Logger.Root.Debug($"Start parsing {id} ");
                    var fullJ = ParseJednani(jednani.First(m => m.Id == id));

                    Devmasters.Logging.Logger.Root.Debug($"Saving {id} ");
                    ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite);
                }
                else if (exists)
                {
                    //check voice2text
                    var fullJ = ds.GetItemSafe(id);
                    if (!(fullJ.PrepisAudia?.Count() > 0))
                    {
                        Devmasters.Logging.Logger.Root.Debug($"Checking AUDIO text {id} ");
                        var aud = Audio(fullJ);
                        if (aud?.Count() > 0)
                        {
                            fullJ.PrepisAudia = aud;
                            ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite);
                        }
                    }
                }
                return(new Devmasters.Batch.ActionOutputData()
                {
                    Log = id
                });
            }, true, maxDegreeOfParallelism: threads);
        }
예제 #3
0
        static void Main(string[] args)
        {
            string argValue = string.Empty;

            if (args.Count() == 0)
            {
                Help(); return;
            }

            Dictionary <string, string> arguments = new Dictionary <string, string>();

            arguments = args
                        .Select(m => m.Split('='))
                        .ToDictionary(m => m[0].ToLower(), v => v.Length == 1 ? "" : v[1]);


            if (!arguments.TryGetValue("/apikey", out apikey))
            {
                Help(); return;
            }


            int daysBack = 3;

            if (arguments.TryGetValue("/daysback", out argValue))
            {
                daysBack = Convert.ToInt32(argValue);
            }

            int rok = 0;

            if (arguments.TryGetValue("/rok", out argValue))
            {
                rok = Convert.ToInt32(argValue);
            }
            else
            {
                Help(); return;
            }

            bool rewrite = false;

            if (arguments.TryGetValue("/rewrite", out argValue))
            {
                rewrite = true;
            }

            int?schuze = null;

            if (arguments.TryGetValue("/schuze", out argValue))
            {
                schuze = Convert.ToInt32(argValue);
            }



            dsc = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Steno> .OpenDataset(apikey, "stenozaznamy-psp");

            //create dataset

            string datasetid = "stenozaznamy-psp";

            //var data = ParsePSPWeb.ParseSchuze(2010, 5).ToArray();
            //System.Diagnostics.Debugger.Break();

            StreamWriter reader = null;
            CsvWriter    csv    = null;

            HashSet <string> jmena2Check = new HashSet <string>();


            var vsechnSchuze = ParsePSPWeb.VsechnySchuze(rok);

            //find latest item already in DB

            var lastSchuzeInDb = 1;

            List <int> schuzeToParse = new List <int>();

            if (schuze.HasValue)
            {
                schuzeToParse.Add(schuze.Value);
            }
            else if (rewrite)
            {
                schuzeToParse.AddRange(vsechnSchuze.Select(m => m.schuze));
            }
            else
            {
                //za posledni 3 dny
                DateTime after = DateTime.Now.Date.AddDays(-1 * daysBack);
                schuzeToParse.AddRange(vsechnSchuze.Where(m => m.last >= after).Select(m => m.schuze));
            }



            Console.WriteLine("Zpracuji schuze " + string.Join(",", schuzeToParse));

            Devmasters.Batch.Manager.DoActionForAll <int>(schuzeToParse,
                                                          s =>
            {
                foreach (var item in ParsePSPWeb.ParseSchuze(rok, s))
                {
                    try
                    {
                        if (rewrite == false)
                        {
                            var exists = dsc.ItemExists(item.Id);
                            if (exists)
                            {
                                continue; //exists, skip
                            }
                        }
                    }
                    catch (Exception) //doesnt exists
                    {
                    }

                    if (item.celeJmeno?.Split(' ')?.Count() > 2)
                    {
                        if (!jmena2Check.Contains(item.celeJmeno))
                        {
                            jmena2Check.Add(item.celeJmeno);
                        }
                    }

                    using (var net = new Devmasters.Net.HttpClient.URLContent($"https://www.hlidacstatu.cz/api/v1/PoliticiFromText?Authorization={apikey}"))
                    {
                        net.Method = Devmasters.Net.HttpClient.MethodEnum.POST;
                        net.RequestParams.Form.Add("text", item.text);
                        net.Timeout = 60 * 1000;
                        var sosoby  = net.GetContent().Text;
                        var osoby   = Newtonsoft.Json.Linq.JArray.Parse(sosoby);
                        if (osoby != null && osoby.Count > 0)
                        {
                            item.politiciZminky = osoby
                                                  .Select(ja => ja.Value <string>("osobaid"))
                                                  .Where(o => !string.IsNullOrWhiteSpace(o))
                                                  .ToArray();
                        }
                    }


                    if (apikey == "csv")
                    {
                        csv.WriteRecord <Steno>(item);
                        csv.NextRecord();
                        if (item.poradi % 10 == 0)
                        {
                            csv.Flush();
                        }
                    }
                    else
                    {
                        SaveItem(item, true);
                    }
                }

                return(new Devmasters.Batch.ActionOutputData());
            }, !System.Diagnostics.Debugger.IsAttached);

            if (apikey == "csv")
            {
                csv.Flush();
                csv.Dispose();
                reader.Close();
            }


            Console.WriteLine();
            Console.WriteLine("Podezrela jmena:");
            foreach (var k in jmena2Check)
            {
                Console.WriteLine(k);
            }

            return;


            //download, parse and save data into dataset
            //GetData(dsDef, datasetid, fn);
        }