Пример #1
0
        static void Main(string[] args)
        {
            var client = ElasticSearchFactory.GetClient();

            var allLines = File.ReadAllLines(AppDomain.CurrentDomain.BaseDirectory + @"\StiftungsdatenUtf8.csv", Encoding.ASCII);

            Console.WriteLine("Total lines: " + allLines.Length);
            var count = 0;

            foreach (var line in allLines)
            {
                var values = line.Split(';');

                var stiftung = new Stiftung
                {
                    sourceId  = values[0],
                    name      = string.IsNullOrWhiteSpace(values[1]) ? string.IsNullOrWhiteSpace(values[2]) ? values[3] : values[2] : values[1],
                    nameshort = values[4],
                    adresse   = values[5] + " " + values[6] + ", " + values[7] + " " + values[8],
                    url       = values[9],
                    zweck     = values[10]
                };

                var indexResponse = client.IndexDocument(stiftung);

                count++;
                Console.WriteLine("Processed line: " + count);
            }
        }
Пример #2
0
        static async Task MainAsync(string[] args)
        {
            Initialize();

            var client = ElasticSearchFactory.GetClient();

            var searchResponse = client.Search <Stiftung>(s => s
                                                          .From(0)
                                                          .Size(5000)
                                                          );

            var stiftungen = searchResponse.Documents;

            var count = 0;

            foreach (var stiftung in stiftungen)
            {
                try
                {
                    count++;
                    Console.WriteLine($"Processing {count}: {stiftung.name}");
                    var result = await ProcessFoundationAsync(stiftung.name, stiftung.nameshort, 2017, stiftung.url);

                    if (result.Success)
                    {
                        var newStiftung = new Stiftung();
                        newStiftung.id            = stiftung.id;
                        newStiftung.bilanzsumme26 = result.BalanceSheetTotal;
                        newStiftung.jahresbericht = result.FinancialStatementUrl.AbsoluteUri;
                        newStiftung.timestamp     = DateTime.Now;

                        client.Update(new DocumentPath <Stiftung>(newStiftung.id), u => u.Doc(newStiftung));
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Error bei der Stiftung: {0} {1}", stiftung.name, ex);
                }
            }

            // Manual:
            // var rega = ProcessFoundation("Rega", 2017, "rega.ch");
            // var eichholz = ProcessFoundation("Eichholz", 2017, "stiftung-eichholz.ch");
            // var sieber = ProcessFoundation("Pfarrer Sieber", 2017, "swsieber.ch");
            // var hmsg = ProcessFoundation("HMSG", 2017, "hmsg.ch");

            Console.ReadKey();
        }
Пример #3
0
        private static async Task UpdateFoundationsTags(int searchStart, int searchSize, Nest.ElasticClient client, TextAnalyticsAPI textAnalyticsClient)
        {
            var searchResponse = client.Search <Stiftung>(s => s
                                                          .From(searchStart)
                                                          .Size(searchSize)
                                                          );

            var stiftungen = searchResponse.Documents.Where(s => s.tags == null || s.tags.Length < 1).ToArray();

            if (stiftungen.Length >= 1)
            {
                var stiftungsZwecke = await GetPurposeDescriptionsAsMultiLanguageInput(textAnalyticsClient, stiftungen);

                if (stiftungsZwecke.Count < 1)
                {
                    return;
                }
                KeyPhraseBatchResult result = await textAnalyticsClient.KeyPhrasesAsync(
                    new MultiLanguageBatchInput(stiftungsZwecke)
                    );

                // Printing key phrases and writing phrases to stiftung.
                foreach (var document in result.Documents)
                {
                    Console.WriteLine("Document ID: {0} ", document.Id);

                    Console.WriteLine("\t Key phrases:");

                    var  tags        = new List <string>();
                    Guid stiftungsId = new Guid(document.Id);

                    foreach (string keyphrase in document.KeyPhrases)
                    {
                        Console.WriteLine("\t\t" + keyphrase);
                        tags.Add(keyphrase);
                    }

                    var stiftung = new Stiftung();
                    stiftung.id        = stiftungsId;
                    stiftung.tags      = tags.ToArray();
                    stiftung.timestamp = DateTime.Now;

                    client.Update <Stiftung>(new Nest.DocumentPath <Stiftung>(stiftungsId), u => u.Doc(stiftung));
                }
                await Task.Delay(30000);
            }
        }
Пример #4
0
        static void Main(string[] args)
        {
            var zefix = new ZefixSrv();

            Cef.Initialize();
            var browser = new ChromiumWebBrowser();
            var browserManualResetEvent = new ManualResetEvent(false);
            var elasticClient           = ElasticSearchFactory.GetClient();

            // Get all Stiftungen from store
            var stiftungen = elasticClient.Search <Stiftung>(s => s.Size(5000));

            foreach (var stiftung in stiftungen.Documents)
            {
                // Try to find the Stiftung from Zefix (get some basic information)
                // The Delays and Sleeps are necessary to have not more than 200 requests in 10 minutes to Zefix. Otherwise the account will be locked!
                var companyName = stiftung.name;
                var companyInfo = zefix.FindByName(companyName);
                if (companyInfo == null)
                {
                    Console.WriteLine($"Nothing found for '{companyName}'");

                    Thread.Sleep(3000);
                    continue;
                }

                var newStiftung = new Stiftung
                {
                    id = stiftung.id,
                    handelsregisterUID  = companyInfo.Uid,
                    handelsregisterCHNR = companyInfo.ChId,
                    handelsregisterAmt  = companyInfo.RegisterOfficeId,
                    kanton = companyInfo.CantonIso
                };

                var hadDelay = false;

                // Try to find data from Handelsregister -> tricky
                EventHandler <LoadingStateChangedEventArgs> loadedStateChanged = async(sender, e) =>
                {
                    if (e.IsLoading)
                    {
                        return;
                    }


                    // Hard to get the final HTML view due to the used techonlogies of the Handlesregister solution(s).
                    // With the delay we give the site time enough to load additional view-parts
                    // @Handelsregister: Please fix your SOAP service!
                    Console.WriteLine($"Loading for company '{companyName}'");
                    await Task.Delay(5000);

                    hadDelay = true;

                    var sourceVisitor = new TaskStringVisitor();
                    browser.GetMainFrame().GetSource(sourceVisitor);

                    var siteSource = await sourceVisitor.Task;

                    // AngleSharp to parse HTML -> grab the current members of the Stiftungsrat
                    var config   = Configuration.Default.WithCss();
                    var parser   = new HtmlParser(config);
                    var document = parser.Parse(siteSource);

                    var tbody = document.QuerySelector(".personen tbody");
                    if (tbody != null)
                    {
                        var members = new List <Stiftungsratsmitglied>();
                        foreach (var element in tbody.Children)
                        {
                            // unexpected row content or cancelled person
                            if (element.ChildElementCount != 6 ||
                                element.Children.Any(ce => ce.ClassList.Contains("strike")))
                            {
                                continue;
                            }

                            var person     = element.Children[3].TextContent?.Trim();
                            var function   = element.Children[4].TextContent?.Trim();
                            var permission = element.Children[5].TextContent?.Trim();

                            Console.WriteLine($"person: {person}; function: {function}; permission: {permission}");

                            // Could be a company -> exclude
                            if (function != "auditor")
                            {
                                var member = new Stiftungsratsmitglied {
                                    name = person, funktion = function, berechtigung = permission
                                };
                                members.Add(member);
                            }
                        }

                        newStiftung.stiftungsratsmitglieder = members.ToArray();
                    }

                    browserManualResetEvent.Set();
                };

                browser.LoadingStateChanged += loadedStateChanged;

                // The Handelsregister solutions has different URLs per canton
                var address = HrgUrlHelper.GetQueryUrl(companyInfo);
                browser.Load(address);

                browserManualResetEvent.WaitOne();
                browserManualResetEvent.Reset();

                browser.LoadingStateChanged -= loadedStateChanged;

                if (!hadDelay)
                {
                    Thread.Sleep(5000);
                }

                newStiftung.timestamp = DateTime.Now;
                elasticClient.Update(new DocumentPath <Stiftung>(stiftung.id), u => u.Doc(newStiftung));
            }

            Cef.Shutdown();

            Console.WriteLine();
            Console.WriteLine("FINISHED :-)");
            Console.ReadKey();
        }