static void Main(string[] args) { var client = ElasticSearchFactory.GetClient(); var allLines = File.ReadAllLines(AppDomain.CurrentDomain.BaseDirectory + @"\StiftungsdatenUtf8.csv", Encoding.ASCII); Console.WriteLine("Total lines: " + allLines.Length); var count = 0; foreach (var line in allLines) { var values = line.Split(';'); var stiftung = new Stiftung { sourceId = values[0], name = string.IsNullOrWhiteSpace(values[1]) ? string.IsNullOrWhiteSpace(values[2]) ? values[3] : values[2] : values[1], nameshort = values[4], adresse = values[5] + " " + values[6] + ", " + values[7] + " " + values[8], url = values[9], zweck = values[10] }; var indexResponse = client.IndexDocument(stiftung); count++; Console.WriteLine("Processed line: " + count); } }
static async Task MainAsync(string[] args) { Initialize(); var client = ElasticSearchFactory.GetClient(); var searchResponse = client.Search <Stiftung>(s => s .From(0) .Size(5000) ); var stiftungen = searchResponse.Documents; var count = 0; foreach (var stiftung in stiftungen) { try { count++; Console.WriteLine($"Processing {count}: {stiftung.name}"); var result = await ProcessFoundationAsync(stiftung.name, stiftung.nameshort, 2017, stiftung.url); if (result.Success) { var newStiftung = new Stiftung(); newStiftung.id = stiftung.id; newStiftung.bilanzsumme26 = result.BalanceSheetTotal; newStiftung.jahresbericht = result.FinancialStatementUrl.AbsoluteUri; newStiftung.timestamp = DateTime.Now; client.Update(new DocumentPath <Stiftung>(newStiftung.id), u => u.Doc(newStiftung)); } } catch (Exception ex) { Console.WriteLine("Error bei der Stiftung: {0} {1}", stiftung.name, ex); } } // Manual: // var rega = ProcessFoundation("Rega", 2017, "rega.ch"); // var eichholz = ProcessFoundation("Eichholz", 2017, "stiftung-eichholz.ch"); // var sieber = ProcessFoundation("Pfarrer Sieber", 2017, "swsieber.ch"); // var hmsg = ProcessFoundation("HMSG", 2017, "hmsg.ch"); Console.ReadKey(); }
private static async Task UpdateFoundationsTags(int searchStart, int searchSize, Nest.ElasticClient client, TextAnalyticsAPI textAnalyticsClient) { var searchResponse = client.Search <Stiftung>(s => s .From(searchStart) .Size(searchSize) ); var stiftungen = searchResponse.Documents.Where(s => s.tags == null || s.tags.Length < 1).ToArray(); if (stiftungen.Length >= 1) { var stiftungsZwecke = await GetPurposeDescriptionsAsMultiLanguageInput(textAnalyticsClient, stiftungen); if (stiftungsZwecke.Count < 1) { return; } KeyPhraseBatchResult result = await textAnalyticsClient.KeyPhrasesAsync( new MultiLanguageBatchInput(stiftungsZwecke) ); // Printing key phrases and writing phrases to stiftung. foreach (var document in result.Documents) { Console.WriteLine("Document ID: {0} ", document.Id); Console.WriteLine("\t Key phrases:"); var tags = new List <string>(); Guid stiftungsId = new Guid(document.Id); foreach (string keyphrase in document.KeyPhrases) { Console.WriteLine("\t\t" + keyphrase); tags.Add(keyphrase); } var stiftung = new Stiftung(); stiftung.id = stiftungsId; stiftung.tags = tags.ToArray(); stiftung.timestamp = DateTime.Now; client.Update <Stiftung>(new Nest.DocumentPath <Stiftung>(stiftungsId), u => u.Doc(stiftung)); } await Task.Delay(30000); } }
static void Main(string[] args) { var zefix = new ZefixSrv(); Cef.Initialize(); var browser = new ChromiumWebBrowser(); var browserManualResetEvent = new ManualResetEvent(false); var elasticClient = ElasticSearchFactory.GetClient(); // Get all Stiftungen from store var stiftungen = elasticClient.Search <Stiftung>(s => s.Size(5000)); foreach (var stiftung in stiftungen.Documents) { // Try to find the Stiftung from Zefix (get some basic information) // The Delays and Sleeps are necessary to have not more than 200 requests in 10 minutes to Zefix. Otherwise the account will be locked! var companyName = stiftung.name; var companyInfo = zefix.FindByName(companyName); if (companyInfo == null) { Console.WriteLine($"Nothing found for '{companyName}'"); Thread.Sleep(3000); continue; } var newStiftung = new Stiftung { id = stiftung.id, handelsregisterUID = companyInfo.Uid, handelsregisterCHNR = companyInfo.ChId, handelsregisterAmt = companyInfo.RegisterOfficeId, kanton = companyInfo.CantonIso }; var hadDelay = false; // Try to find data from Handelsregister -> tricky EventHandler <LoadingStateChangedEventArgs> loadedStateChanged = async(sender, e) => { if (e.IsLoading) { return; } // Hard to get the final HTML view due to the used techonlogies of the Handlesregister solution(s). // With the delay we give the site time enough to load additional view-parts // @Handelsregister: Please fix your SOAP service! Console.WriteLine($"Loading for company '{companyName}'"); await Task.Delay(5000); hadDelay = true; var sourceVisitor = new TaskStringVisitor(); browser.GetMainFrame().GetSource(sourceVisitor); var siteSource = await sourceVisitor.Task; // AngleSharp to parse HTML -> grab the current members of the Stiftungsrat var config = Configuration.Default.WithCss(); var parser = new HtmlParser(config); var document = parser.Parse(siteSource); var tbody = document.QuerySelector(".personen tbody"); if (tbody != null) { var members = new List <Stiftungsratsmitglied>(); foreach (var element in tbody.Children) { // unexpected row content or cancelled person if (element.ChildElementCount != 6 || element.Children.Any(ce => ce.ClassList.Contains("strike"))) { continue; } var person = element.Children[3].TextContent?.Trim(); var function = element.Children[4].TextContent?.Trim(); var permission = element.Children[5].TextContent?.Trim(); Console.WriteLine($"person: {person}; function: {function}; permission: {permission}"); // Could be a company -> exclude if (function != "auditor") { var member = new Stiftungsratsmitglied { name = person, funktion = function, berechtigung = permission }; members.Add(member); } } newStiftung.stiftungsratsmitglieder = members.ToArray(); } browserManualResetEvent.Set(); }; browser.LoadingStateChanged += loadedStateChanged; // The Handelsregister solutions has different URLs per canton var address = HrgUrlHelper.GetQueryUrl(companyInfo); browser.Load(address); browserManualResetEvent.WaitOne(); browserManualResetEvent.Reset(); browser.LoadingStateChanged -= loadedStateChanged; if (!hadDelay) { Thread.Sleep(5000); } newStiftung.timestamp = DateTime.Now; elasticClient.Update(new DocumentPath <Stiftung>(stiftung.id), u => u.Doc(newStiftung)); } Cef.Shutdown(); Console.WriteLine(); Console.WriteLine("FINISHED :-)"); Console.ReadKey(); }