public static async void MainAsync(string[] args) { Console.OutputEncoding = System.Text.Encoding.UTF8; //// Create a client. var client = ElasticSearchFactory.GetClient(); TextAnalyticsAPI textAnalyticsClient = new TextAnalyticsAPI(new ApiKeyServiceClientCredentials()) { AzureRegion = AzureRegions.Westcentralus }; var countResponse = client.Count <Stiftung>(); var count = countResponse.Count; var searchSize = 20; var searchStart = 0; do { await UpdateFoundationsTags(searchStart, searchSize, client, textAnalyticsClient); searchStart += searchSize; }while (searchStart < count); Console.WriteLine("You're finished. Great!"); }
static void Main(string[] args) { var client = ElasticSearchFactory.GetClient(); var allLines = File.ReadAllLines(AppDomain.CurrentDomain.BaseDirectory + @"\StiftungsdatenUtf8.csv", Encoding.ASCII); Console.WriteLine("Total lines: " + allLines.Length); var count = 0; foreach (var line in allLines) { var values = line.Split(';'); var stiftung = new Stiftung { sourceId = values[0], name = string.IsNullOrWhiteSpace(values[1]) ? string.IsNullOrWhiteSpace(values[2]) ? values[3] : values[2] : values[1], nameshort = values[4], adresse = values[5] + " " + values[6] + ", " + values[7] + " " + values[8], url = values[9], zweck = values[10] }; var indexResponse = client.IndexDocument(stiftung); count++; Console.WriteLine("Processed line: " + count); } }
public void FillTagsByStiftungen() { var client = ElasticSearchFactory.GetClient(); var searchResponse = client.Search <Stiftung>(s => s .From(0) .Size(10000) ); var stiftungen = searchResponse.Documents; var tags = stiftungen .Where(s => s.tags != null) .SelectMany( s => s.tags .Where(FilterTag) .Select(t => new Tag { kanton = s.kanton, name = t, stiftungId = s.id, stiftungName = s.name }) ) .Where(t => t != null); foreach (var tag in tags) { client.IndexDocument(tag); } }
public void FillSizungsraeteByStiftungen() { var client = ElasticSearchFactory.GetClient(); var searchResponse = client.Search <Stiftung>(s => s .From(0) .Size(10000) ); var stiftungen = searchResponse.Documents; var stiftungsraete = stiftungen .Where(s => s.stiftungsratsmitglieder != null) .SelectMany( s => s.stiftungsratsmitglieder .Where(sr => sr?.name != null) .Select(t => new Stiftungsrat { kanton = s.kanton, name = GetOnlyName(t.name), nameFull = t.name, stiftungId = s.id, stiftungName = s.name }) ) .Where(t => t != null); foreach (var stiftungsrat in stiftungsraete) { client.IndexDocument(stiftungsrat); } }
static async Task MainAsync(string[] args) { Initialize(); var client = ElasticSearchFactory.GetClient(); var searchResponse = client.Search <Stiftung>(s => s .From(0) .Size(5000) ); var stiftungen = searchResponse.Documents; var count = 0; foreach (var stiftung in stiftungen) { try { count++; Console.WriteLine($"Processing {count}: {stiftung.name}"); var result = await ProcessFoundationAsync(stiftung.name, stiftung.nameshort, 2017, stiftung.url); if (result.Success) { var newStiftung = new Stiftung(); newStiftung.id = stiftung.id; newStiftung.bilanzsumme26 = result.BalanceSheetTotal; newStiftung.jahresbericht = result.FinancialStatementUrl.AbsoluteUri; newStiftung.timestamp = DateTime.Now; client.Update(new DocumentPath <Stiftung>(newStiftung.id), u => u.Doc(newStiftung)); } } catch (Exception ex) { Console.WriteLine("Error bei der Stiftung: {0} {1}", stiftung.name, ex); } } // Manual: // var rega = ProcessFoundation("Rega", 2017, "rega.ch"); // var eichholz = ProcessFoundation("Eichholz", 2017, "stiftung-eichholz.ch"); // var sieber = ProcessFoundation("Pfarrer Sieber", 2017, "swsieber.ch"); // var hmsg = ProcessFoundation("HMSG", 2017, "hmsg.ch"); Console.ReadKey(); }
public void SeedSamples() { var client = ElasticSearchFactory.GetClient(); foreach (var stiftung in this.GetSamples()) { var indexResponse = client.IndexDocument(stiftung); } var searchResponse = client.Search <Stiftung>(s => s .From(0) .Size(10) .Query(q => q .Match(m => m .Field(f => f.name) .Query("Brot") ) ) ); var stiftungen = searchResponse.Documents; }
static void Main(string[] args) { var zefix = new ZefixSrv(); Cef.Initialize(); var browser = new ChromiumWebBrowser(); var browserManualResetEvent = new ManualResetEvent(false); var elasticClient = ElasticSearchFactory.GetClient(); // Get all Stiftungen from store var stiftungen = elasticClient.Search <Stiftung>(s => s.Size(5000)); foreach (var stiftung in stiftungen.Documents) { // Try to find the Stiftung from Zefix (get some basic information) // The Delays and Sleeps are necessary to have not more than 200 requests in 10 minutes to Zefix. Otherwise the account will be locked! var companyName = stiftung.name; var companyInfo = zefix.FindByName(companyName); if (companyInfo == null) { Console.WriteLine($"Nothing found for '{companyName}'"); Thread.Sleep(3000); continue; } var newStiftung = new Stiftung { id = stiftung.id, handelsregisterUID = companyInfo.Uid, handelsregisterCHNR = companyInfo.ChId, handelsregisterAmt = companyInfo.RegisterOfficeId, kanton = companyInfo.CantonIso }; var hadDelay = false; // Try to find data from Handelsregister -> tricky EventHandler <LoadingStateChangedEventArgs> loadedStateChanged = async(sender, e) => { if (e.IsLoading) { return; } // Hard to get the final HTML view due to the used techonlogies of the Handlesregister solution(s). // With the delay we give the site time enough to load additional view-parts // @Handelsregister: Please fix your SOAP service! Console.WriteLine($"Loading for company '{companyName}'"); await Task.Delay(5000); hadDelay = true; var sourceVisitor = new TaskStringVisitor(); browser.GetMainFrame().GetSource(sourceVisitor); var siteSource = await sourceVisitor.Task; // AngleSharp to parse HTML -> grab the current members of the Stiftungsrat var config = Configuration.Default.WithCss(); var parser = new HtmlParser(config); var document = parser.Parse(siteSource); var tbody = document.QuerySelector(".personen tbody"); if (tbody != null) { var members = new List <Stiftungsratsmitglied>(); foreach (var element in tbody.Children) { // unexpected row content or cancelled person if (element.ChildElementCount != 6 || element.Children.Any(ce => ce.ClassList.Contains("strike"))) { continue; } var person = element.Children[3].TextContent?.Trim(); var function = element.Children[4].TextContent?.Trim(); var permission = element.Children[5].TextContent?.Trim(); Console.WriteLine($"person: {person}; function: {function}; permission: {permission}"); // Could be a company -> exclude if (function != "auditor") { var member = new Stiftungsratsmitglied { name = person, funktion = function, berechtigung = permission }; members.Add(member); } } newStiftung.stiftungsratsmitglieder = members.ToArray(); } browserManualResetEvent.Set(); }; browser.LoadingStateChanged += loadedStateChanged; // The Handelsregister solutions has different URLs per canton var address = HrgUrlHelper.GetQueryUrl(companyInfo); browser.Load(address); browserManualResetEvent.WaitOne(); browserManualResetEvent.Reset(); browser.LoadingStateChanged -= loadedStateChanged; if (!hadDelay) { Thread.Sleep(5000); } newStiftung.timestamp = DateTime.Now; elasticClient.Update(new DocumentPath <Stiftung>(stiftung.id), u => u.Doc(newStiftung)); } Cef.Shutdown(); Console.WriteLine(); Console.WriteLine("FINISHED :-)"); Console.ReadKey(); }