private static string Request(string url) { using (Devmasters.Net.HttpClient.URLContent http = new Devmasters.Net.HttpClient.URLContent(url)) { http.Timeout = 30000; http.RequestParams.Headers.Add("Authorization", "Token " + token); try { var a = http.GetContent(System.Text.Encoding.UTF8).Text; return(a); } catch (Devmasters.Net.HttpClient.UrlContentException e) { HlidacStatu.Util.Consts.Logger.Error("Merk request " + url, e); //if (e.StatusCode == System.Net.HttpStatusCode.NotFound) // return string.Empty; //else if (e.InnerException != null && e.InnerException.Message.Contains("204")) { return(emptyJson); } return(emptyJson); } catch (Exception e) { HlidacStatu.Util.Consts.Logger.Error("Merk request " + url, e); //Console.Write(e.ToString()); return(emptyJson); } } }
private static Dictionary <string, string> GetCiselnik(string propertyPrefix) { string url = $"https://monitor.statnipokladna.cz/data/xml/{propertyPrefix}.xml"; try { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(url)) { net.Timeout = 1000 * 180; var d = net.GetContent(); XElement xe = XElement.Parse(d.Text); return(xe.Elements() .Select(m => new { k = m.Element($"{propertyPrefix}_id").Value, v = m.Element($"{propertyPrefix}_nazev").Value }) .ToDictionary(m => m.k, m => m.v)); } } catch { return(null); } }
private static string CallEndpoint(string endpoint, string content, string id, int timeoutMs) { using (Devmasters.Net.HttpClient.URLContent request = new Devmasters.Net.HttpClient.URLContent(classificationBaseUrl() + $"/{endpoint}?doc_id={id}")) { request.Method = Devmasters.Net.HttpClient.MethodEnum.POST; request.Tries = 3; request.TimeInMsBetweenTries = 5000; request.Timeout = timeoutMs; request.ContentType = "application/json; charset=utf-8"; request.RequestParams.RawContent = content; Devmasters.Net.HttpClient.TextContentResult response = null; try { Util.Consts.Logger.Debug($"Calling classifier endpoint [{endpoint}] for {id} from " + request.Url); response = request.GetContent(); return(response.Text); } catch (Exception e) { Util.Consts.Logger.Error($"Classification {endpoint} API error for {id} " + request.Url, e); throw; } } }
static FinanceDataCalculator() { lock (lockObj) { if (obdobi != null) { return; } try { using (Devmasters.Net.HttpClient.URLContent url = new Devmasters.Net.HttpClient.URLContent("https://monitor.statnipokladna.cz/api/obdobi")) { var html = url.GetContent(); JArray data = JArray.Parse(html.Text); obdobi = data .Where(m => m.Value <bool>("isYear") == true) .Select(m => new { k = m.Value <int>("year"), v = m.Value <int>("loadID") }) .ToArray() .ToDictionary(k => k.k, v => v.v); } } catch (Exception e) { obdobi = new Dictionary <int, int>(); } } }
public static TestProxyResult TestProxy(IWebProxyWithStatus wp, int timeoutInMs = 10000) { TestProxyResult result = new TestProxyResult(); var sw = new Stopwatch(); using (Devmasters.Net.HttpClient.URLContent url = new Devmasters.Net.HttpClient.URLContent("http://api.devmasters.cz/ip.ashx")) { try { sw.Start(); url.Timeout = timeoutInMs; url.Proxy = wp; string content = url.GetContent().Text; result.Success = (content == wp.GetProxy(apiHost).Host); } catch (Exception ex) { result.Success = false; result.Error = ex; } finally { sw.Stop(); result.ElapsedTime = sw.Elapsed; } return(result); } }
public bool PostToTopic(int topicId, string post) { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(GetUrl("posts", false))) { try { //net.ContentType = "application/json"; net.Method = Devmasters.Net.HttpClient.MethodEnum.POST; net.RequestParams.Form.Add("api_key", apiKey); net.RequestParams.Form.Add("api_username", apiUsername); net.RequestParams.Form.Add("topic_id", topicId.ToString()); net.RequestParams.Form.Add("raw", post); var s = net.GetContent().Text; var ret = Newtonsoft.Json.Linq.JObject.Parse(s); return(true); } catch (Exception e) { HlidacStatu.Util.Consts.Logger.Error("PostToTopic error", e); return(false); } } }
static string GetHtml(string url) { try { //be nice to psp.cz System.Threading.Thread.Sleep((int)(Program.rnd.NextDouble() * 100)); using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(url)) { net.Tries = 10; net.TimeInMsBetweenTries = 2000 * 10; //10s return(net.GetContent().Text); } } catch (Devmasters.Net.HttpClient.UrlContentException ex) { var innerR = (ex.InnerException as System.Net.WebException)?.Response as System.Net.HttpWebResponse; if (innerR?.StatusCode == System.Net.HttpStatusCode.NotFound) { return(null); } throw ex; } catch (Exception e) { throw e; } }
private static Jednani.Dokument[] GetZapisy(Jednani j) { if (hzapis == null) { using (Devmasters.Net.HttpClient.URLContent nzapis = new Devmasters.Net.HttpClient.URLContent("https://www.ceskatelevize.cz/rada-ct/zapisy-z-jednani/")) { hzapis = nzapis.GetContent().Text; } } var dzapis = new Devmasters.XPath(hzapis); var zapisy = dzapis.GetNodes("//a[@class='pdf']"); List <Jednani.Dokument> docs = new List <Jednani.Dokument>(); foreach (var z in zapisy) { if (z.InnerText.Contains($"({j.DatumJednani.ToString("d. M. yyyy")})")) { Uri?url = null; Uri.TryCreate(new Uri(urlPrefix), z.Attributes["href"].Value, out url); docs.Add(new Jednani.Dokument() { HsProcessType = "document", DocumentUrl = url.AbsoluteUri, Nazev = z.InnerText.Trim(), Typ = "zápis" }); } } return(docs.ToArray()); }
// GET: ApiV1 public ActionResult Index() { if (Framework.ApiAuth.IsApiAuth(this).Authentificated) { //global::hlst ViewBag.Token = HlidacStatu.Lib.Data.AspNetUserToken.GetToken(this.User.Identity.Name).Token.ToString("N"); if (!string.IsNullOrEmpty(Request.QueryString["getocr"])) { using (Devmasters.Net.HttpClient.URLContent url = new Devmasters.Net.HttpClient.URLContent( $"https://ocr.hlidacstatu.cz/AddApi.ashx?apikey={Devmasters.Config.GetWebConfigValue("OCRServerApiKey")}&email={this.User.Identity.Name}" )) { var json = Newtonsoft.Json.Linq.JToken.Parse(url.GetContent().Text); ViewBag.OcrToken = json.Value <string>("apikey"); } } return(View()); } else { return(Redirect(MvcApplication.LoginRedirPath + "?returnUrl=" + System.Net.WebUtility.UrlEncode("/api/v1/Index"))); } }
private List <Devmasters.SpeechToText.VoiceToTextFormatter.TextWithTimestamp> _checkDownloadAndStartV2TOrGet(bool startV2T, string datasetid, string recordid, string videourl) { List <Devmasters.SpeechToText.VoiceToTextFormatter.TextWithTimestamp> blocks = null; string recId = recordid; string fnFile = $"{Mp3Path}\\{datasetid}\\{recId}"; var MP3Fn = $"{fnFile}.mp3"; var newtonFn = $"{fnFile}.mp3.raw_s2t"; var dockerFn = $"{fnFile}.ctm"; if (System.IO.File.Exists(MP3Fn) == false) { System.Diagnostics.ProcessStartInfo piv = new System.Diagnostics.ProcessStartInfo("youtube-dl.exe", $"--no-progress --extract-audio --audio-format mp3 --postprocessor-args \" -ac 1 -ar 16000\" -o \"{fnFile}.%(ext)s\" " + videourl ); Devmasters.ProcessExecutor pev = new Devmasters.ProcessExecutor(piv, 60 * 6 * 24); pev.StandardOutputDataReceived += (o, e) => { Devmasters.Logging.Logger.Root.Debug(e.Data); }; Devmasters.Logging.Logger.Root.Info($"Starting Youtube-dl for {videourl} "); pev.Start(); } bool exists_S2T = System.IO.File.Exists(newtonFn) || System.IO.File.Exists(dockerFn); if (exists_S2T == false && startV2T) { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent( $"https://www.hlidacstatu.cz/api/v2/internalq/Voice2TextNewTask/{datasetid}/{recId}") ) { net.Method = Devmasters.Net.HttpClient.MethodEnum.POST; net.RequestParams.Headers.Add("Authorization", Apikey); net.GetContent(); } } if (exists_S2T) { if (System.IO.File.Exists(newtonFn)) { var tt = new Newton.SpeechToText.Cloud.FileAPI.VoiceToTerms(System.IO.File.ReadAllText(newtonFn)); blocks = new Devmasters.SpeechToText.VoiceToTextFormatter(tt.Terms) .TextWithTimestamps(TimeSpan.FromSeconds(10), true); } else if (System.IO.File.Exists(dockerFn)) { var tt = new KaldiASR.SpeechToText.VoiceToTerms(System.IO.File.ReadAllText(dockerFn)); blocks = new Devmasters.SpeechToText.VoiceToTextFormatter(tt.Terms) .TextWithTimestamps(TimeSpan.FromSeconds(10), true); } } return(blocks); }
public static string[] AgendaList(int year) { using (var net = new Devmasters.Net.HttpClient.URLContent(string.Format(listUrl, year))) { net.UserAgent = Devmasters.Net.HttpClient.BrowserUserAgent.IE11; //net.RequestParams.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"; var html = net.GetContent().Text; var xp = new XPath(html); return(xp.GetNodes("//div[@class='content-main']//a[starts-with(@href,'/djv-agenda')]") .Select(m => m.InnerText) .ToArray()); } }
public static string GetOsobaId(string jmeno, string prijmeni, DateTime narozeni) { string url = $"https://www.hlidacstatu.cz/api/v2/osoby/hledat?jmeno={System.Net.WebUtility.UrlEncode(jmeno)}&prijmeni={System.Net.WebUtility.UrlEncode(prijmeni)}&datumNarozeni={narozeni:yyyy-MM-dd}"; try { using (System.Net.WebClient wc = new System.Net.WebClient()) { wc.Headers.Add("Authorization", Program.apiKey); var str = wc.DownloadString(url); var persons = Newtonsoft.Json.JsonConvert.DeserializeObject <person[]>(str); if (persons?.Count() > 0) { return(persons.First().NameId); } else { return(null); } } } catch (Exception ex) { System.Threading.Thread.Sleep(200); try { using (var net = new Devmasters.Net.HttpClient.URLContent(url)) { net.TimeInMsBetweenTries = 500; net.Tries = 5; net.RequestParams.Headers.Add("Authorization", Program.apiKey); var json = net.GetContent().Text; var persons = Newtonsoft.Json.JsonConvert.DeserializeObject <person[]>(json); if (persons?.Count() > 0) { return(persons.First().NameId); } else { return(null); } } } catch (Exception e) { SkutecniMajitele.Program.logger.Error(url, e); } return(null); } }
private static Jednani.Dokument[] GetMaterialy(Jednani j) { if (mzapis == null) { using (Devmasters.Net.HttpClient.URLContent nzapis = new Devmasters.Net.HttpClient.URLContent("https://www.ceskatelevize.cz/rada-ct/materialy-projednane-radou/")) { mzapis = nzapis.GetContent().Text; } } var dzapis = new Devmasters.XPath(mzapis); var casti = dzapis.GetNodes("//div[contains(@class,'contentArticle')]/h4[@class='odsazeni']"); List <Jednani.Dokument> docs = new List <Jednani.Dokument>(); foreach (var z in casti) { if (z.InnerText.Contains($"{j.DatumJednani.ToString("d. M. yyyy")}")) { var pars = Devmasters.XPath.Tools.GetNodes(z, "following::*"); //jdi az do dalsiho h4 foreach (var par in pars) { if (par.Name == "p") { var link = par.ChildNodes.Where(m => m.Name == "a").FirstOrDefault(); if (link != null) { Uri?url = null; Uri.TryCreate(new Uri(urlPrefix), link.Attributes["href"].Value, out url); docs.Add(new Jednani.Dokument() { HsProcessType = "document", DocumentUrl = url.AbsoluteUri, Typ = "material", Nazev = link.InnerText }); } } if (par.Name == "h4" && par.Attributes.FirstOrDefault()?.Value == "odsazeni") { goto end; //dalsi h4, pryc } } } } end: return(docs.ToArray()); }
static Jednani ParseJednani(Jednani j) { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(j.Odkaz)) { var html = net.GetContent().Text; var doc = new Devmasters.XPath(html); j.Delka = Devmasters.TextUtil.ConvertToInt(Regex.Replace(doc.GetNodeText("//p[@class='duration']"), "\\D", ""), 0).Value; j.Materialy = GetMaterialy(j); j.Zapisy = GetZapisy(j); j.PrepisAudia = Audio(j); } return(j); }
//https://monitor.statnipokladna.cz/api/prispevkove-organizace?obdobi=1512&ic=00006947 public static JObject GetData(string url) { try { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(url)) { net.Timeout = 1000 * 180; var json = net.GetContent(); JObject data = JObject.Parse(json.Text); return(data); } } catch { return(null); } }
public static string findInHS(string fullname, string fce) { //using (var net = new System.Net.WebClient()) //{ // net.Encoding = System.Text.Encoding.UTF8; // string url = $"https://www.hlidacstatu.cz/api/v1/FindOsobaId?Authorization={apikey}&" // + $"celejmeno={System.Net.WebUtility.UrlEncode(fullname)}&funkce={System.Net.WebUtility.UrlEncode(fce)}"; // var json = net.DownloadString(url); // return Newtonsoft.Json.Linq.JObject.Parse(json).Value<string>("OsobaId"); //} using (var net = new Devmasters.Net.HttpClient.URLContent($"https://www.hlidacstatu.cz/api/v1/PolitikFromText?Authorization={apikey}")) { net.Method = Devmasters.Net.HttpClient.MethodEnum.POST; net.RequestParams.Form.Add("text", $"{fullname} {fce}"); net.Timeout = 60 * 1000; var sosoba = net.GetContent().Text; var osoba = Newtonsoft.Json.Linq.JObject.Parse(sosoba); return(osoba.Value <string>("osobaid")); } }
private Dictionary <string, DateTime> GetBankStatementLinks() { using (var url = new Devmasters.Net.HttpClient.URLContent(Ucet.Url)) { var doc = new Devmasters.XPath(url.GetContent().Text); return(doc.GetNodes( "//div[@class='npw-transaction-group']/ul[@class='npw-documents']//a[text()[contains(.,'Transakce')]]") ?.Select(n => new { url = "https://www.csob.cz" + n.Attributes["href"].Value, month = "01-" + n.InnerText.Replace("Transakce ", "").Replace("/", "-").Trim() } ) ?.ToDictionary(k => k.url, v => DateTime.ParseExact(v.month, "dd-MM-yyyy", Consts.czCulture)) ?? new Dictionary <string, DateTime>()); ; } }
public bool InviteNewUser(string email) { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(GetUrl("invites", false))) { try { //net.ContentType = "application/json"; net.Method = Devmasters.Net.HttpClient.MethodEnum.POST; net.RequestParams.Form.Add("api_key", apiKey); net.RequestParams.Form.Add("api_username", apiUsername); net.RequestParams.Form.Add("group_names", "Hlidac-team-members"); net.RequestParams.Form.Add("email", email); net.RequestParams.Form.Add("custom_message", @"Ahoj. Toto je pozvámka do Platforma.Hlidacstatu.cz - veřejné diskuzní platformy s privátní částí pro naše teamové diskuze. Interně je to vhodný systém pro diskuzi nad konkrétními projekty, nápady, kdy jsou jednotlivá témata diskuze od sebe oddělena (narozdíl od Slacku).\n Současně je to platforma pro veřejnou diskuzi a kontakt s veřejností. Michal Bláha"); var s = net.GetContent().Text; var ret = Newtonsoft.Json.Linq.JObject.Parse(s); if (ret["success"].ToObject <string>() == "OK") { return(true); } else { return(false); } } catch (Exception e) { HlidacStatu.Util.Consts.Logger.Error("platforma invitation error", e); return(false); } } }
private static XPath GetPage(string url) { try { string html = ""; using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(url)) { net.IgnoreHttpErrors = false; //Console.WriteLine($"Downloading {url} "); net.Timeout = 60000; net.Tries = 5; html = net.GetContent().Text; } return(new XPath(html)); } catch (Exception ex) { Console.WriteLine($"{url} - {ex.Message}"); throw ex; } }
public RPP() { using (var net = new Devmasters.Net.HttpClient.URLContent(root + "/AISP/verejne/ovm-spuu/katalog-kategorii-ovm")) { var res = net.GetContent(); authCook = res.Context; } var wch = new HttpClientHandler() { CookieContainer = new System.Net.CookieContainer() }; wch.CookieContainer.Add(authCook.Cookies["JSessionID"]); wc = new HttpClient(wch); wc.DefaultRequestHeaders.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("application/json")); wc.DefaultRequestHeaders.Add("Sec-Fetch-Site", "same-origin"); wc.DefaultRequestHeaders.Add("Sec-Fetch-Mode", "cors"); wc.DefaultRequestHeaders.Add("Sec-Fetch-Dest", "empty"); wc.DefaultRequestHeaders.AcceptEncoding.Add(new System.Net.Http.Headers.StringWithQualityHeaderValue("utf-8")); //wc.DefaultRequestHeaders.Add("Cookie", authCook.Cookies["JSessionID"].ToString()); }
private static GetInfoResponse GetResponse(string request) { try { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent("https://www.mojedatovaschranka.cz/sds/ws/call")) { net.Method = Devmasters.Net.HttpClient.MethodEnum.POST; net.RequestParams.RawContent = request; var resp = net.GetContent().Text; XmlSerializer serializer = new XmlSerializer(typeof(GetInfoResponse)); GetInfoResponse obj = serializer.Deserialize(new System.IO.StringReader(resp)) as GetInfoResponse; return(obj); } } catch (Exception e) { HlidacStatu.Util.Consts.Logger.Error("GetDatoveSchranky request error", e); return(null); } }
public static bool OdstranenoZInsolvencnihoRejstriku(string url) { try { string html = ""; using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(url)) { html = net.GetContent().Text; } Devmasters.XPath doc = new Devmasters.XPath(html); var spocet = doc.GetNodeText("//table[@class='vysledekLustrace']//tr//td[contains(text(),'POČET')]/following-sibling::*")?.Trim(); var pocet = Util.ParseTools.ToInt(spocet); if (pocet.HasValue && pocet.Value == 0) { return(true); } } catch (Exception e) { Util.Consts.Logger.Error("", e); } return(false); }
private Lib.Data.Logs.ProfilZadavateleDownload _processReqProfiluZadavatel(VZ.ProfilZadavatele profil, DateTime from, DateTime to) { string xmlUrlTemp = profil.Url; if (profil.Url?.EndsWith("/") == true) { xmlUrlTemp = xmlUrlTemp + "XMLdataVZ?od={0:ddMMyyy}&do={1:ddMMyyyy}"; } else { xmlUrlTemp = xmlUrlTemp + "/XMLdataVZ?od={0:ddMMyyy}&do={1:ddMMyyyy}"; } var xml = ""; Devmasters.DT.StopWatchEx sw = new Devmasters.DT.StopWatchEx(); sw.Start(); var surl = string.Format(xmlUrlTemp, from, to); var ReqLog = new Lib.Data.Logs.ProfilZadavateleDownload() { Date = DateTime.Now, ProfileId = profil.Id, RequestedUrl = surl }; try { sem.WaitOne(); using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(surl)) { //net.TimeInMsBetweenTries = 20*1000; //net.Tries = 1; net.Timeout = 60 * 1000; xml = net.GetContent().Text; ReqLog.HttpValid = true; } } catch (Devmasters.Net.HttpClient.UrlContentException ex) { ReqLog.HttpValid = false; ReqLog.HttpError = ex.ToString(); if (ex.InnerException != null && ex.InnerException.GetType() == typeof(System.Net.WebException)) { var wex = (System.Net.WebException)ex.InnerException; ReqLog.HttpError = wex.ToString(); if (wex.Status == WebExceptionStatus.ProtocolError && wex.Response != null) { ReqLog.HttpErrorCode = (int)(((HttpWebResponse)wex.Response).StatusCode); } } ReqLog.Save(); profil.LastAccessResult = VZ.ProfilZadavatele.LastAccessResults.HttpError; profil.LastAccess = DateTime.Now; profil.Save(); return(ReqLog); } catch (System.Net.WebException wex) { ReqLog.HttpValid = false; ReqLog.HttpError = wex.ToString(); if (wex.Status == WebExceptionStatus.ProtocolError && wex.Response != null) { ReqLog.HttpErrorCode = (int)(((HttpWebResponse)wex.Response).StatusCode); } ReqLog.Save(); profil.LastAccessResult = VZ.ProfilZadavatele.LastAccessResults.HttpError; profil.LastAccess = DateTime.Now; profil.Save(); return(ReqLog); } catch (Exception e) { ReqLog.HttpValid = false; ReqLog.HttpError = e.ToString(); ReqLog.Save(); profil.LastAccessResult = VZ.ProfilZadavatele.LastAccessResults.HttpError; profil.LastAccess = DateTime.Now; profil.Save(); return(ReqLog); } finally { sem.Release(); sw.Stop(); ReqLog.ResponseMs = sw.ElapsedMilliseconds; } Lib.Data.External.ProfilZadavatelu.ProfilStructure prof = null; try { prof = ParserXml(xml); ReqLog.XmlValid = true; } catch (Exception e) { ReqLog.XmlValid = false; ReqLog.XmlError = e.ToString(); ReqLog.XmlInvalidContent = xml; ReqLog.Save(); profil.LastAccessResult = VZ.ProfilZadavatele.LastAccessResults.XmlError; profil.LastAccess = DateTime.Now; profil.Save(); return(ReqLog); } if (prof != null) { var cli = Lib.ES.Manager.GetESClient_VerejneZakazkyNaProfiluRaw(); foreach (var zak in prof.zakazka) { Lib.Data.External.ProfilZadavatelu.ZakazkaRaw myZak = new Lib.Data.External.ProfilZadavatelu.ZakazkaRaw(zak, profil); myZak.Save(); } ReqLog.Save(); profil.LastAccessResult = VZ.ProfilZadavatele.LastAccessResults.OK; profil.LastAccess = DateTime.Now; profil.Save(); } return(ReqLog); }
protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = null, DateTime?toDate = null) { List <IBankovniPolozka> polozky = new List <IBankovniPolozka>(); if (!fromDate.HasValue) { fromDate = DateTime.Now.Date.AddYears(-1).AddDays(1); } if (!toDate.HasValue) { toDate = DateTime.Now.Date; } int page = 0; //https://www.rb.cz/o-nas/povinne-uverejnovane-informace/transparentni-ucty?p_p_id=Transparentaccountportlet_WAR_Transparentaccountportlet_INSTANCE_e6cf4781&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_resource_id=nextTransactions&p_p_cacheability=cacheLevelPage&p_p_col_id=_DynamicNestedPortlet_INSTANCE_f5c4beca__column-1-1&p_p_col_count=1&idBankAccount=24389217&fromIndex=51&dateFrom=2016-3-1&dateTo=2018-3-9&q= //https://www.rb.cz/o-nas/povinne-uverejnovane-informace/transparentni-ucty?p_p_id=Transparentaccountportlet_WAR_Transparentaccountportlet_INSTANCE_e6cf4781&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_resource_id=nextTransactions&p_p_cacheability=cacheLevelPage&p_p_col_id=_DynamicNestedPortlet_INSTANCE_f5c4beca__column-1-1&p_p_col_count=1&idBankAccount=24389217&fromIndex=0&dateFrom=2016-3-1&dateTo=2018-3-17&q= using (Devmasters.Net.HttpClient.URLContent baseUrl = new Devmasters.Net.HttpClient.URLContent(this.Ucet.Url)) { baseUrl.IgnoreHttpErrors = true; var html = baseUrl.GetContent(Encoding.UTF8); var webReqInstance = Devmasters.RegexUtil.GetRegexGroupValue(html.Text, "Transparentaccountportlet_INSTANCE_(?<inst>[a-z0-9]*)_", "inst"); var dynamicInst = Devmasters.RegexUtil.GetRegexGroupValue(html.Text, "p_p_id_DynamicNestedPortlet_INSTANCE_(?<inst>[a-z0-9]*)_", "inst"); var internalIdBankAccount = Devmasters.RegexUtil.GetRegexGroupValue(html.Text, @"idBankAccount=(?<id>\d*)", "id"); if (!string.IsNullOrEmpty(webReqInstance)) { bool getSomeData = true; string cisloUctu = this.Ucet.CisloUctu.Split('/')[0]; do { string url = string.Format(@"https://www.rb.cz/o-nas/povinne-uverejnovane-informace/transparentni-ucty?" + "p_p_id=Transparentaccountportlet_WAR_Transparentaccountportlet_INSTANCE_{0}&p_p_lifecycle=2&p_p_state=normal" + "&p_p_mode=view&p_p_resource_id=nextTransactions&p_p_cacheability=cacheLevelPage" + "&p_p_col_id=_DynamicNestedPortlet_INSTANCE_{1}__column-1-1&p_p_col_count=1" + "&idBankAccount={2}&fromIndex={3}&dateFrom={4}&dateTo={5}&q=" , webReqInstance, dynamicInst, internalIdBankAccount, page * 20 + 1, fromDate.Value.ToString("yyyy-M-d"), toDate.Value.ToString("yyyy-M-d")); using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(url, html.Context)) { net.IgnoreHttpErrors = true; var json = net.GetContent().Text; try { RBData data = Newtonsoft.Json.JsonConvert.DeserializeObject <RBData>(json); page++; if (data.transactions != null && data.transactions.Count() > 0) { getSomeData = true; polozky.AddRange( data.transactions .Select(m => new SimpleBankovniPolozka() { Castka = HlidacStatu.Util.ParseTools.ToDecimal(m.amount) ?? 0, CisloProtiuctu = "", CisloUctu = this.Ucet.CisloUctu, Datum = Devmasters.DT.Util.ToDateTime(m.datumDate, "dd.MM.yyyy").Value, KS = m.constSymbol, NazevProtiuctu = m.accountName, PopisTransakce = m.type, SS = m.specSymbol, VS = m.varSymbol, ZdrojUrl = baseUrl.Url, ZpravaProPrijemce = m.info, }) ); } else { getSomeData = false; } } catch (Exception e) { TULogger.Error("RB parser JSON error", e); return(polozky); } } } while (getSomeData); } } return(polozky); }
static void Main(string[] arguments) { Console.WriteLine($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}"); Devmasters.Logging.Logger.Root.Info($"Jednání-Rady-ČT - {System.Reflection.Assembly.GetEntryAssembly().GetName().Version}"); Devmasters.Logging.Logger.Root.Debug("Jednání Rady ČT starting with " + string.Join(',', arguments)); var args = new Devmasters.Args(arguments, new string[] { "/mp3path", "/apikey" }); if (args.MandatoryPresent() == false) { Help(); } mp3path = args.Get("/mp3path", null); if (args.Exists("/utdl")) { YTDL = args["/utdl"]; } else { YTDL = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "\\youtube-dl.exe"; } startPath = System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location); apiKey = args["/apikey"]; rewrite = args.Exists("/rewrite"); afterDay = DateTime.Now.Date.AddDays(-1 * args.GetNumber("/daysback", 10000).Value); if (args.Exists("/ids")) { ids = args.GetArray("/ids"); } skips2t = args.Exists("/skips2t"); int threads = args.GetNumber("/t") ?? 5; try { ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .OpenDataset(apiKey, DataSetId); } catch (ApiException e) { ds = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Jednani> .CreateDataset(apiKey, Registration()); } catch (Exception e) { throw; } string nextPages = "https://www.ceskatelevize.cz/ivysilani/10000000064-jednani-rady-ceske-televize/dalsi-casti/{0}"; int page = 0; bool stop = false; List <Jednani> jednani = new List <Jednani>(); do { page++; using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(string.Format(nextPages, page))) { Console.WriteLine($"Page {page}"); net.IgnoreHttpErrors = true; net.Tries = 5; net.TimeInMsBetweenTries = 2000; string html = ""; try { Devmasters.Logging.Logger.Root.Debug($"downloading {net.Url} "); html = net.GetContent().Text; } catch (Exception e) { Devmasters.Logging.Logger.Root.Error($"{net.Url} failed", e); } Devmasters.XPath xp = new Devmasters.XPath(html); var links = xp.GetNodes("//li[contains(@class,'itemBlock')]"); if (links == null || links.Count == 0) { break; } foreach (var link in links) { Jednani j = new Jednani(); j.Odkaz = urlPrefix + Devmasters.XPath.Tools.GetNodeAttributeValue(link, "div/h3/a[@class='itemSetPaging']", "href"); j.Titulek = Devmasters.XPath.Tools.GetNodeText(link, "div/h3/a[@class='itemSetPaging']").Trim(); j.DatumJednani = Devmasters.DT.Util.ToDate(Devmasters.XPath.Tools.GetNodeText(link, "div/p").Trim()) ?? DateTime.MinValue; j.Id = Devmasters.RegexUtil.GetRegexGroupValue(j.Odkaz, "/ivysilani/10000000064-jednani-rady-ceske-televize/(?<id>\\d{2,})", "id"); if (j.DatumJednani > afterDay && (ids == null || ids.Contains(j.Id)) ) { jednani.Add(j); } } } } while (stop == false); // Devmasters.Logging.Logger.Root.Debug($"Starting {jednani.Count} items "); Devmasters.Batch.Manager.DoActionForAll <string>(jednani.Select(m => m.Id).Reverse(), id => { bool exists = ds.ItemExists(id); if (!string.IsNullOrEmpty(id) && (!exists || rewrite) ) { Devmasters.Logging.Logger.Root.Debug($"Start parsing {id} "); var fullJ = ParseJednani(jednani.First(m => m.Id == id)); Devmasters.Logging.Logger.Root.Debug($"Saving {id} "); ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } else if (exists) { //check voice2text var fullJ = ds.GetItemSafe(id); if (!(fullJ.PrepisAudia?.Count() > 0)) { Devmasters.Logging.Logger.Root.Debug($"Checking AUDIO text {id} "); var aud = Audio(fullJ); if (aud?.Count() > 0) { fullJ.PrepisAudia = aud; ds.AddOrUpdateItem(fullJ, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } } } return(new Devmasters.Batch.ActionOutputData() { Log = id }); }, true, maxDegreeOfParallelism: threads); }
private static void GetExcelFromUzisZIP_Old() { string fn = GetExecutingDirectoryName() + $"\\dip-report-kraje-{DateTime.Now:yyyyMMdd-HHmmss}.xlsx"; string fnTemp = System.IO.Path.GetTempFileName(); //nejnovejsi ZIP for (int i = 0; i < 7; i++) { DateTime dt = DateTime.Now.Date.AddDays(-1 * i); string zipUrl = $"https://share.uzis.cz/s/fbCgFKagS6fCrzc/download?path=%2F{dt.Year}-{dt.ToString("MM")}%20({dt.ToString("MMMM", System.Globalization.CultureInfo.GetCultureInfo("cs"))}%20{dt.Year})&files={dt:yyyy-MM-dd}-dostupnost-kapacit.zip"; //$"https://share.uzis.cz/s/fbCgFKagS6fCrzc/download?path=%2F&files={dt:yyyy-MM-dd}-dostupnost-kapacit.zip"; Devmasters.Logging.Logger.Root.Info($"Getting ZIP url {zipUrl}"); using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(zipUrl)) { try { System.IO.File.WriteAllBytes(fnTemp, net.GetBinary().Binary); break; } catch (Exception e) { } } } try { Devmasters.Logging.Logger.Root.Info("Getting Excel from ZIP"); //get xlsx from ZIP using (ZipArchive archive = ZipFile.OpenRead(fnTemp)) { foreach (ZipArchiveEntry entry in archive.Entries) { if (entry.FullName.EndsWith(".xlsx", StringComparison.OrdinalIgnoreCase)) { entry.ExtractToFile(fn); } } } if (false) //download xls from web { //find xls url string openDataPage = "https://onemocneni-aktualne.mzcr.cz/api/v2/covid-19"; Uri xlsUrl = null; Devmasters.Logging.Logger.Root.Info("Getting URL of XLS from " + openDataPage); using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(openDataPage)) { Devmasters.Logging.Logger.Root.Info("Getting Excel URL"); var html = net.GetContent().Text; Devmasters.XPath xp = new Devmasters.XPath(html); var node = xp.GetNode("//a[contains(@href,'dip-report-kraje.xlsx')]"); if (node != null) { xlsUrl = new Uri("https://onemocneni-aktualne.mzcr.cz" + node.Attributes["href"].Value); } } if (xlsUrl == null) { Devmasters.Logging.Logger.Root.Fatal("No URL to download"); return; } using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent(xlsUrl.AbsoluteUri)) { Devmasters.Logging.Logger.Root.Info("Getting Excel"); System.IO.File.WriteAllBytes(fn, net.GetBinary().Binary); } } //debug //fn = @"c:\!!\ONLINE_DISPECINK_IP_dostupne_kapacity_20201014_05-50.xlsx"; ExcelPackage.LicenseContext = LicenseContext.NonCommercial; using (var p = new ExcelPackage(new System.IO.FileInfo(fn))) { ExcelWorksheet ws = p.Workbook.Worksheets[1]; //find date //Analýza provedena z exportu 01.10.2020 for (int row = 1; row < 100000; row++) { Console.Write("."); var txt = ws.Cells[row, 1].GetValue <string>(); if (txt != null && txt.StartsWith("Stav k datu:")) { string head = txt.Replace("Stav k datu: ", ""); string sdate = Devmasters.RegexUtil.GetRegexGroupValue(head, @" \s* (?<dt>\d{1,2}\s*\.\s*\d{1,2}\s*\.\s*\d{4} )", "dt"); DateTime dt = Devmasters.DT.Util.ToDate(sdate).Value; string id = "id_" + dt.ToString("yyyy-MM-dd"); NemocniceData nd = null; try { nd = ds.GetItem(id); // new NemocniceData(); } catch (Exception) { } if (nd == null) { nd = new NemocniceData(); nd.regions = new List <NemocniceData.Region>(); } nd.lastUpdated = dt; nd.id = id; Console.WriteLine("."); Devmasters.Logging.Logger.Root.Info(nd.lastUpdated.ToString()); row = row + 4; List <NemocniceData.Region> finalRegs = new List <NemocniceData.Region>(); for (int regs = 0; regs < 14; regs++) { string region = ws.Cells[row + regs, 1].GetValue <string>(); NemocniceData.Region r = nd.regions.FirstOrDefault(m => m.region == region); //new NemocniceData.Region(); if (r == null) { r = new NemocniceData.Region(); } r.lastModified = nd.lastUpdated; r.region = region; r.UPV_celkem = ws.Cells[row + regs, 4].GetValue <int>(); r.UPV_volna = ws.Cells[row + regs, 5].GetValue <int>(); r.ECMO_celkem = ws.Cells[row + regs, 5].GetValue <int>(); r.ECMO_volna = ws.Cells[row + regs, 6].GetValue <int>(); r.CRRT_celkem = ws.Cells[row + regs, 8].GetValue <int>(); r.CRRT_volna = ws.Cells[row + regs, 9].GetValue <int>(); r.IHD_celkem = ws.Cells[row + regs, 11].GetValue <int>(); r.IHD_volna = ws.Cells[row + regs, 12].GetValue <int>(); r.AROJIP_luzka_celkem = ws.Cells[row + regs, 14].GetValue <int>(); r.AROJIP_luzka_covid = ws.Cells[row + regs, 15].GetValue <int>(); r.AROJIP_luzka_necovid = ws.Cells[row + regs, 16].GetValue <int>(); r.Standard_luzka_s_kyslikem_celkem = ws.Cells[row + regs, 18].GetValue <int>(); r.Standard_luzka_s_kyslikem_covid = ws.Cells[row + regs, 19].GetValue <int>(); r.Standard_luzka_s_kyslikem_necovid = ws.Cells[row + regs, 20].GetValue <int>(); r.Lekari_AROJIP_celkem = ws.Cells[row + regs, 22].GetValue <int>(); r.Lekari_AROJIP_dostupni = ws.Cells[row + regs, 23].GetValue <int>(); r.Sestry_AROJIP_celkem = ws.Cells[row + regs, 25].GetValue <int>(); r.Sestry_AROJIP_dostupni = ws.Cells[row + regs, 26].GetValue <int>(); r.Ventilatory_prenosne_celkem = ws.Cells[row + regs, 28].GetValue <int>(); r.Ventilatory_operacnisal_celkem = ws.Cells[row + regs, 29].GetValue <int>(); r.Standard_luzka_celkem = ws.Cells[row + regs, 30].GetValue <int>(); r.Standard_luzka_s_monitor_celkem = ws.Cells[row + regs, 31].GetValue <int>(); finalRegs.Add(r); } nd.regions = finalRegs; row = row + 16; Devmasters.Logging.Logger.Root.Info("Saving"); ds.AddOrUpdateItem(nd, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } } } } catch (Exception e) { Devmasters.Logging.Logger.Root.Error("Processing ZIP XLS error", e); SendMail("*****@*****.**", "Selhalo zpracovani dat z UZIS", e.ToString(), ""); } }
static void Main(string[] args) { string argValue = string.Empty; if (args.Count() == 0) { Help(); return; } Dictionary <string, string> arguments = new Dictionary <string, string>(); arguments = args .Select(m => m.Split('=')) .ToDictionary(m => m[0].ToLower(), v => v.Length == 1 ? "" : v[1]); if (!arguments.TryGetValue("/apikey", out apikey)) { Help(); return; } int daysBack = 3; if (arguments.TryGetValue("/daysback", out argValue)) { daysBack = Convert.ToInt32(argValue); } int rok = 0; if (arguments.TryGetValue("/rok", out argValue)) { rok = Convert.ToInt32(argValue); } else { Help(); return; } bool rewrite = false; if (arguments.TryGetValue("/rewrite", out argValue)) { rewrite = true; } int?schuze = null; if (arguments.TryGetValue("/schuze", out argValue)) { schuze = Convert.ToInt32(argValue); } dsc = HlidacStatu.Api.V2.Dataset.Typed.Dataset <Steno> .OpenDataset(apikey, "stenozaznamy-psp"); //create dataset string datasetid = "stenozaznamy-psp"; //var data = ParsePSPWeb.ParseSchuze(2010, 5).ToArray(); //System.Diagnostics.Debugger.Break(); StreamWriter reader = null; CsvWriter csv = null; HashSet <string> jmena2Check = new HashSet <string>(); var vsechnSchuze = ParsePSPWeb.VsechnySchuze(rok); //find latest item already in DB var lastSchuzeInDb = 1; List <int> schuzeToParse = new List <int>(); if (schuze.HasValue) { schuzeToParse.Add(schuze.Value); } else if (rewrite) { schuzeToParse.AddRange(vsechnSchuze.Select(m => m.schuze)); } else { //za posledni 3 dny DateTime after = DateTime.Now.Date.AddDays(-1 * daysBack); schuzeToParse.AddRange(vsechnSchuze.Where(m => m.last >= after).Select(m => m.schuze)); } Console.WriteLine("Zpracuji schuze " + string.Join(",", schuzeToParse)); Devmasters.Batch.Manager.DoActionForAll <int>(schuzeToParse, s => { foreach (var item in ParsePSPWeb.ParseSchuze(rok, s)) { try { if (rewrite == false) { var exists = dsc.ItemExists(item.Id); if (exists) { continue; //exists, skip } } } catch (Exception) //doesnt exists { } if (item.celeJmeno?.Split(' ')?.Count() > 2) { if (!jmena2Check.Contains(item.celeJmeno)) { jmena2Check.Add(item.celeJmeno); } } using (var net = new Devmasters.Net.HttpClient.URLContent($"https://www.hlidacstatu.cz/api/v1/PoliticiFromText?Authorization={apikey}")) { net.Method = Devmasters.Net.HttpClient.MethodEnum.POST; net.RequestParams.Form.Add("text", item.text); net.Timeout = 60 * 1000; var sosoby = net.GetContent().Text; var osoby = Newtonsoft.Json.Linq.JArray.Parse(sosoby); if (osoby != null && osoby.Count > 0) { item.politiciZminky = osoby .Select(ja => ja.Value <string>("osobaid")) .Where(o => !string.IsNullOrWhiteSpace(o)) .ToArray(); } } if (apikey == "csv") { csv.WriteRecord <Steno>(item); csv.NextRecord(); if (item.poradi % 10 == 0) { csv.Flush(); } } else { SaveItem(item, true); } } return(new Devmasters.Batch.ActionOutputData()); }, !System.Diagnostics.Debugger.IsAttached); if (apikey == "csv") { csv.Flush(); csv.Dispose(); reader.Close(); } Console.WriteLine(); Console.WriteLine("Podezrela jmena:"); foreach (var k in jmena2Check) { Console.WriteLine(k); } return; //download, parse and save data into dataset //GetData(dsDef, datasetid, fn); }
private IEnumerable <IBankovniPolozka> ParseStatement(string url) { var polozky = new HashSet <IBankovniPolozka>(); using (var net = new Devmasters.Net.HttpClient.URLContent(url)) { net.IgnoreHttpErrors = true; var content = net.GetContent(Encoding.UTF8).Text; if (content.Contains("Některé pohyby nemusí být zobrazeny. Zmenšete datumový rozsah.")) { throw new StatementTooLongException(); } var doc = new Devmasters.XPath(content); var xoverviewRows = "//div[contains(@class, 'pohybySum')]/table/tbody/tr"; var overviewRows = doc.GetNodes(xoverviewRows)?.Count ?? 0; if (overviewRows == 0) { TULogger.Warning($"FIO: Account statement page was not found for account {Ucet.CisloUctu}. Account has been probably canceled. Url: {url}"); return(new List <IBankovniPolozka>()); } var overview = new StatementOverview { OpeningBalance = parseAmount(doc.GetNodeText(xoverviewRows + "/td[1]")), FinalBalance = parseAmount(doc.GetNodeText(xoverviewRows + "/td[2]")), CreditSum = parseAmount(doc.GetNodeText(xoverviewRows + "/td[3]")), DebitSum = parseAmount(doc.GetNodeText(xoverviewRows + "/td[4]")) }; var xrows = "//table[@class='table' and starts-with(@id,'id')]/tbody/tr"; var rows = doc.GetNodes(xrows)?.Count ?? 0; for (var row = 1; row <= rows; row++) { var xroot = xrows + "[" + row + "]"; var p = new SimpleBankovniPolozka { CisloUctu = Ucet.CisloUctu, Datum = Devmasters.DT.Util.ToDateTime(doc.GetNodeText(xroot + "/td[1]"), "dd.MM.yyyy").Value, Castka = parseAmount(System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[2]"))), PopisTransakce = System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[3]")), NazevProtiuctu = System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[4]")), ZpravaProPrijemce = Devmasters.TextUtil.NormalizeToBlockText( System.Net.WebUtility.HtmlDecode(doc.GetNodeHtml(xroot + "/td[5]")) ?.Replace("<br>", " \n") ) }; var poznamka = Devmasters.TextUtil.NormalizeToBlockText( System.Net.WebUtility.HtmlDecode(doc.GetNodeHtml(xroot + "/td[9]")) ?.Replace("<br>", " \n") ); if (poznamka != p.ZpravaProPrijemce) { p.ZpravaProPrijemce += " " + poznamka; } p.KS = doc.GetNodeText(xroot + "/td[6]"); p.VS = doc.GetNodeText(xroot + "/td[7]"); p.SS = doc.GetNodeText(xroot + "/td[8]"); p.ZdrojUrl = net.Url; p.CisloProtiuctu = ""; //neni k dispozici if (!polozky.Contains(p)) { polozky.Add(p); } } ValidateParsedItems(polozky, overview); } return(polozky); }
public static IEnumerable <jednani> ParseAgenda(string sdatum) { string html = ""; using (var net = new Devmasters.Net.HttpClient.URLContent(string.Format(agendaUrl, sdatum))) { html = net.GetContent().Text; } DateTime datum = DateTime.ParseExact(sdatum, "yyyy-MM-dd", System.Globalization.CultureInfo.GetCultureInfo("en-US"), System.Globalization.DateTimeStyles.AssumeLocal); List <string> usneseni = new List <string>(); List <jednani> js = new List <jednani>(); object lockObj = new object(); var xp = new XPath(html); var rows = xp.GetNodes("//table[@class='lfr-table djv-agenda-table']//tr"); int ruzneCount = 0; foreach (var r in rows) { if (XPath.Tools.GetNodeAttributeValue(r, "td[1]", "colspan") != null) { continue; } var bod = XPath.Tools.GetNodeText(r, "td[2]") ?? ""; var obsah = XPath.Tools.GetNode(r, "td[5]"); if (bod == null && obsah == null) { obsah = XPath.Tools.GetNode(r, "td[1]"); } if (bod.Contains("Usnesení č.")) { usneseni.Add(bod); } else if (bod.Contains("Příloha č.")) { } else { jednani j = new jednani(); if (string.IsNullOrEmpty(bod)) { ruzneCount++; bod = "Různé č." + ruzneCount; } j.bod = bod.Trim(); j.datum = datum; j.vec = obsah?.InnerText; var docs = XPath.Tools.GetNodes(r, "td[3]/a"); if (docs != null) { j.dokumenty = docs.Select(m => new jednani.dokument() { DocumentUrl = NormalizeUrl(m.GetAttributeValue("href", "")), stahnuto = DateTime.Now, jmeno = "Znění", }).ToArray(); } if (obsah != null && XPath.Tools.GetNodes(obsah, ".//a") != null) { j.veklep = XPath.Tools.GetNodes(obsah, ".//a") .Select(m => m.GetAttributeValue("href", null)) .Where(n => n?.StartsWith("/veklep") == true) .Select(m => m.Replace("/veklep-detail?pid=", "")) .ToArray(); j.dokumenty = XPath.Tools.GetNodes(obsah, ".//a") .Where(m => m.GetAttributeValue("href", null) != null && m.GetAttributeValue("href", null)?.StartsWith("/veklep") == false) .Select(m => new jednani.dokument() { DocumentUrl = NormalizeUrl(m.GetAttributeValue("href", null)), jmeno = m.InnerText } ) .ToArray(); } j.SetId(); js.Add(j); } } //parse usneseni Devmasters.Batch.Manager.DoActionForAll(usneseni, (u) => { lock (lockObj) { js.Add(ParseUsneseni(datum, System.Text.RegularExpressions.Regex.Replace(u, "\\D", ""))); } return(new Devmasters.Batch.ActionOutputData()); } , null , null //new Devmasters.Core.Batch.ActionProgressWriter(0.1f).Write , true , maxDegreeOfParallelism: 5, prefix: "DAT " + sdatum + ":" ); var dids = js.Select(m => m.Id).Distinct().ToArray(); if (dids.Count() != js.Count()) { for (int i = 1; i < js.Count(); i++) { for (int j = 0; j < i; j++) { if (js[i].Id == js[j].Id) { js[i].Id = js[i].Id + "-" + i; } } } } return(js); }
public static void Process(osoba o, string playlist, int threads, int max, string[] vids, string mp3path) { logger.Info($"Starting {o.Jmeno} {o.Prijmeni} {o.NameId} for {playlist} "); List <string> videos = null; if (vids?.Count() > 0) { videos = vids .Select(m => "https://www.youtube.com/watch?v=" + m) .ToList(); } else { System.Diagnostics.ProcessStartInfo pi = new System.Diagnostics.ProcessStartInfo("youtube-dl", $"--flat-playlist --get-id --playlist-end {max} " + playlist ); Devmasters.ProcessExecutor pe = new Devmasters.ProcessExecutor(pi, 60 * 6 * 24); logger.Info($"Starting Youtube-dl playlist video list "); pe.Start(); videos = pe.StandardOutput .Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries) .Select(m => "https://www.youtube.com/watch?v=" + m) .ToList(); } Console.WriteLine(); Console.WriteLine($"Processing {videos.Count} videos"); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); Devmasters.Batch.Manager.DoActionForAll(videos, vid => { string uniqId = record.UniqueID(vid); record rec = null; bool merge = false; bool changed = false; if (Program.api2.ItemExists(uniqId)) { rec = Program.api2.GetItem(uniqId); merge = true; } else { rec = YTDL.GetVideoInfo(vid); if (rec == null) { return(new Devmasters.Batch.ActionOutputData()); } rec.osobaid = o.NameId; changed = true; } string recId = uniqId; string fnFile = $"{mp3path}\\{DataSetId}\\{recId}"; var MP3Fn = $"{fnFile}.mp3"; var newtonFn = $"{fnFile}.mp3.raw_s2t"; var dockerFn = $"{fnFile}.ctm"; if (System.IO.File.Exists(MP3Fn) == false) { System.Diagnostics.ProcessStartInfo piv = new System.Diagnostics.ProcessStartInfo("youtube-dl.exe", $"--no-progress --extract-audio --audio-format mp3 --postprocessor-args \" -ac 1 -ar 16000\" -o \"{fnFile}.%(ext)s\" " + vid ); Devmasters.ProcessExecutor pev = new Devmasters.ProcessExecutor(piv, 60 * 6 * 24); pev.StandardOutputDataReceived += (ox, e) => { logger.Debug(e.Data); }; logger.Info($"Starting Youtube-dl for {vid} "); pev.Start(); } bool exists_S2T = System.IO.File.Exists(newtonFn) || System.IO.File.Exists(dockerFn); if (exists_S2T == false && rec.prepisAudia == null) { using (Devmasters.Net.HttpClient.URLContent net = new Devmasters.Net.HttpClient.URLContent( $"https://www.hlidacstatu.cz/api/v2/internalq/Voice2TextNewTask/{DataSetId}/{recId}?priority=2") ) { net.Method = Devmasters.Net.HttpClient.MethodEnum.POST; net.RequestParams.Headers.Add("Authorization", System.Configuration.ConfigurationManager.AppSettings["apikey"]); net.GetContent(); } } if (exists_S2T && !(rec.prepisAudia?.Count() > 0)) { if (System.IO.File.Exists(dockerFn)) { var tt = new KaldiASR.SpeechToText.VoiceToTerms(System.IO.File.ReadAllText(dockerFn)); var blocks = new Devmasters.SpeechToText.VoiceToTextFormatter(tt.Terms) .TextWithTimestamps(TimeSpan.FromSeconds(10), true) .Select(t => new record.Blok() { sekundOdZacatku = (long)t.Start.TotalSeconds, text = t.Text }) .ToArray(); //TODO opravit casem var tmpRec = YTDL.GetVideoInfo(vid); if (tmpRec != null) { rec.text = tmpRec.text + "\n\n" + new Devmasters.SpeechToText.VoiceToTextFormatter(tt.Terms).Text(true); } rec.prepisAudia = blocks; changed = true; } } if (changed) { api2.AddOrUpdateItem(rec, HlidacStatu.Api.V2.Dataset.Typed.ItemInsertMode.rewrite); } return(new Devmasters.Batch.ActionOutputData()); }, Devmasters.Batch.Manager.DefaultOutputWriter, Devmasters.Batch.Manager.DefaultProgressWriter, !System.Diagnostics.Debugger.IsAttached, maxDegreeOfParallelism: threads ); }