Example #1
0
        public static async void MainAsync(string[] args)
        {
            Console.OutputEncoding = System.Text.Encoding.UTF8;

            //// Create a client.
            var client = ElasticSearchFactory.GetClient();

            TextAnalyticsAPI textAnalyticsClient = new TextAnalyticsAPI(new ApiKeyServiceClientCredentials())
            {
                AzureRegion = AzureRegions.Westcentralus
            };

            var countResponse = client.Count <Stiftung>();
            var count         = countResponse.Count;

            var searchSize  = 20;
            var searchStart = 0;

            do
            {
                await UpdateFoundationsTags(searchStart, searchSize, client, textAnalyticsClient);

                searchStart += searchSize;
            }while (searchStart < count);

            Console.WriteLine("You're finished. Great!");
        }
Example #2
0
        static void Main(string[] args)
        {
            var client = ElasticSearchFactory.GetClient();

            var allLines = File.ReadAllLines(AppDomain.CurrentDomain.BaseDirectory + @"\StiftungsdatenUtf8.csv", Encoding.ASCII);

            Console.WriteLine("Total lines: " + allLines.Length);
            var count = 0;

            foreach (var line in allLines)
            {
                var values = line.Split(';');

                var stiftung = new Stiftung
                {
                    sourceId  = values[0],
                    name      = string.IsNullOrWhiteSpace(values[1]) ? string.IsNullOrWhiteSpace(values[2]) ? values[3] : values[2] : values[1],
                    nameshort = values[4],
                    adresse   = values[5] + " " + values[6] + ", " + values[7] + " " + values[8],
                    url       = values[9],
                    zweck     = values[10]
                };

                var indexResponse = client.IndexDocument(stiftung);

                count++;
                Console.WriteLine("Processed line: " + count);
            }
        }
Example #3
0
        public void FillTagsByStiftungen()
        {
            var client = ElasticSearchFactory.GetClient();

            var searchResponse = client.Search <Stiftung>(s => s
                                                          .From(0)
                                                          .Size(10000)
                                                          );

            var stiftungen = searchResponse.Documents;

            var tags = stiftungen
                       .Where(s => s.tags != null)
                       .SelectMany(
                s => s.tags
                .Where(FilterTag)
                .Select(t => new Tag {
                kanton = s.kanton, name = t, stiftungId = s.id, stiftungName = s.name
            })
                )
                       .Where(t => t != null);

            foreach (var tag in tags)
            {
                client.IndexDocument(tag);
            }
        }
Example #4
0
        public void FillSizungsraeteByStiftungen()
        {
            var client = ElasticSearchFactory.GetClient();

            var searchResponse = client.Search <Stiftung>(s => s
                                                          .From(0)
                                                          .Size(10000)
                                                          );

            var stiftungen = searchResponse.Documents;

            var stiftungsraete = stiftungen
                                 .Where(s => s.stiftungsratsmitglieder != null)
                                 .SelectMany(
                s => s.stiftungsratsmitglieder
                .Where(sr => sr?.name != null)
                .Select(t => new Stiftungsrat {
                kanton = s.kanton, name = GetOnlyName(t.name), nameFull = t.name, stiftungId = s.id, stiftungName = s.name
            })
                )
                                 .Where(t => t != null);

            foreach (var stiftungsrat in stiftungsraete)
            {
                client.IndexDocument(stiftungsrat);
            }
        }
Example #5
0
        static async Task MainAsync(string[] args)
        {
            Initialize();

            var client = ElasticSearchFactory.GetClient();

            var searchResponse = client.Search <Stiftung>(s => s
                                                          .From(0)
                                                          .Size(5000)
                                                          );

            var stiftungen = searchResponse.Documents;

            var count = 0;

            foreach (var stiftung in stiftungen)
            {
                try
                {
                    count++;
                    Console.WriteLine($"Processing {count}: {stiftung.name}");
                    var result = await ProcessFoundationAsync(stiftung.name, stiftung.nameshort, 2017, stiftung.url);

                    if (result.Success)
                    {
                        var newStiftung = new Stiftung();
                        newStiftung.id            = stiftung.id;
                        newStiftung.bilanzsumme26 = result.BalanceSheetTotal;
                        newStiftung.jahresbericht = result.FinancialStatementUrl.AbsoluteUri;
                        newStiftung.timestamp     = DateTime.Now;

                        client.Update(new DocumentPath <Stiftung>(newStiftung.id), u => u.Doc(newStiftung));
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Error bei der Stiftung: {0} {1}", stiftung.name, ex);
                }
            }

            // Manual:
            // var rega = ProcessFoundation("Rega", 2017, "rega.ch");
            // var eichholz = ProcessFoundation("Eichholz", 2017, "stiftung-eichholz.ch");
            // var sieber = ProcessFoundation("Pfarrer Sieber", 2017, "swsieber.ch");
            // var hmsg = ProcessFoundation("HMSG", 2017, "hmsg.ch");

            Console.ReadKey();
        }
Example #6
0
        public void SeedSamples()
        {
            var client = ElasticSearchFactory.GetClient();

            foreach (var stiftung in this.GetSamples())
            {
                var indexResponse = client.IndexDocument(stiftung);
            }

            var searchResponse = client.Search <Stiftung>(s => s
                                                          .From(0)
                                                          .Size(10)
                                                          .Query(q => q
                                                                 .Match(m => m
                                                                        .Field(f => f.name)
                                                                        .Query("Brot")
                                                                        )
                                                                 )
                                                          );

            var stiftungen = searchResponse.Documents;
        }
Example #7
0
        static void Main(string[] args)
        {
            var zefix = new ZefixSrv();

            Cef.Initialize();
            var browser = new ChromiumWebBrowser();
            var browserManualResetEvent = new ManualResetEvent(false);
            var elasticClient           = ElasticSearchFactory.GetClient();

            // Get all Stiftungen from store
            var stiftungen = elasticClient.Search <Stiftung>(s => s.Size(5000));

            foreach (var stiftung in stiftungen.Documents)
            {
                // Try to find the Stiftung from Zefix (get some basic information)
                // The Delays and Sleeps are necessary to have not more than 200 requests in 10 minutes to Zefix. Otherwise the account will be locked!
                var companyName = stiftung.name;
                var companyInfo = zefix.FindByName(companyName);
                if (companyInfo == null)
                {
                    Console.WriteLine($"Nothing found for '{companyName}'");

                    Thread.Sleep(3000);
                    continue;
                }

                var newStiftung = new Stiftung
                {
                    id = stiftung.id,
                    handelsregisterUID  = companyInfo.Uid,
                    handelsregisterCHNR = companyInfo.ChId,
                    handelsregisterAmt  = companyInfo.RegisterOfficeId,
                    kanton = companyInfo.CantonIso
                };

                var hadDelay = false;

                // Try to find data from Handelsregister -> tricky
                EventHandler <LoadingStateChangedEventArgs> loadedStateChanged = async(sender, e) =>
                {
                    if (e.IsLoading)
                    {
                        return;
                    }


                    // Hard to get the final HTML view due to the used techonlogies of the Handlesregister solution(s).
                    // With the delay we give the site time enough to load additional view-parts
                    // @Handelsregister: Please fix your SOAP service!
                    Console.WriteLine($"Loading for company '{companyName}'");
                    await Task.Delay(5000);

                    hadDelay = true;

                    var sourceVisitor = new TaskStringVisitor();
                    browser.GetMainFrame().GetSource(sourceVisitor);

                    var siteSource = await sourceVisitor.Task;

                    // AngleSharp to parse HTML -> grab the current members of the Stiftungsrat
                    var config   = Configuration.Default.WithCss();
                    var parser   = new HtmlParser(config);
                    var document = parser.Parse(siteSource);

                    var tbody = document.QuerySelector(".personen tbody");
                    if (tbody != null)
                    {
                        var members = new List <Stiftungsratsmitglied>();
                        foreach (var element in tbody.Children)
                        {
                            // unexpected row content or cancelled person
                            if (element.ChildElementCount != 6 ||
                                element.Children.Any(ce => ce.ClassList.Contains("strike")))
                            {
                                continue;
                            }

                            var person     = element.Children[3].TextContent?.Trim();
                            var function   = element.Children[4].TextContent?.Trim();
                            var permission = element.Children[5].TextContent?.Trim();

                            Console.WriteLine($"person: {person}; function: {function}; permission: {permission}");

                            // Could be a company -> exclude
                            if (function != "auditor")
                            {
                                var member = new Stiftungsratsmitglied {
                                    name = person, funktion = function, berechtigung = permission
                                };
                                members.Add(member);
                            }
                        }

                        newStiftung.stiftungsratsmitglieder = members.ToArray();
                    }

                    browserManualResetEvent.Set();
                };

                browser.LoadingStateChanged += loadedStateChanged;

                // The Handelsregister solutions has different URLs per canton
                var address = HrgUrlHelper.GetQueryUrl(companyInfo);
                browser.Load(address);

                browserManualResetEvent.WaitOne();
                browserManualResetEvent.Reset();

                browser.LoadingStateChanged -= loadedStateChanged;

                if (!hadDelay)
                {
                    Thread.Sleep(5000);
                }

                newStiftung.timestamp = DateTime.Now;
                elasticClient.Update(new DocumentPath <Stiftung>(stiftung.id), u => u.Doc(newStiftung));
            }

            Cef.Shutdown();

            Console.WriteLine();
            Console.WriteLine("FINISHED :-)");
            Console.ReadKey();
        }