Exemple #1
        private void ProcessSynonymsUntilEmpty(Word word, IWebDriver driver, WordHintDbContext db, User adminUser, int page, HtmlNode documentNode, string url)
            while (true)
                Log.Information("Processing synonym search for '{0}' on page {1}", word.Value, page + 1);
                writer.WriteLine("Processing synonym search for '{0}' on page {1}", word.Value, page + 1);

                // parse all related words
                var relatedWords = ReadRelatedWordsAgilityPack(documentNode, adminUser);

                // and add to database
                // don't update state
                WordDatabaseService.AddToDatabase(db, this.source, word, relatedWords, writer, false);

                // go to next page if exist
                // Note! this only works if we are logged in
                var(hasFoundNextPage, pageNumber, pageUrl, pageNode) = NavigateToNextPageIfExist(driver, documentNode);
                if (hasFoundNextPage)
                    url          = pageUrl;
                    page         = pageNumber;
                    documentNode = pageNode;
        private void ReadWordsByWordUrl(string wordPrefix, string url, IWebDriver driver, WordHintDbContext db, User adminUser, string lastWord)
            // go to word page
            catch (System.Exception)
                // Log.Error("Timeout navigating to '{0}'", url);
                writer.WriteLine("Timeout navigating to '{0}'", url);

            Log.Information("Processing word search for '{0}'", wordPrefix);
            writer.WriteLine("Processing word search for '{0}'", wordPrefix);

            // read the whole document into a HtmlNode
            HtmlNode doc = driver.GetDocumentNode();

            // and parse using agility pack
            var words = ParseWordsAgilityPack(doc, adminUser);

            bool doSkip = true;

            foreach (var wordAndHref in words)
                var word     = wordAndHref.Item1;
                var href     = wordAndHref.Item2;
                var wordText = word.Value;

                // skip until we get to the last word
                if (doSkip && lastWord != null && lastWord != wordText)
                    Log.Information("Skipping alphabetic word '{0}' until we find '{1}'", wordText, lastWord);
                    writer.WriteLine("Skipping alphabetic word '{0}' until we find '{1}'", wordText, lastWord);
                doSkip = false; // make sure we don't skip on the next word after we have skipped

                // update that we are processing this word
                WordDatabaseService.UpdateState(db, source, new Word()
                    Value = wordText, Comment = wordPrefix, CreatedDate = DateTime.Now
                }, writer, true);

                GetWordSynonyms(word, driver, db, adminUser, href);
Exemple #3
        private void DoScrape(string source, bool doContinueWithLastWord)
            var dbContextFactory = new DesignTimeDbContextFactory();

            using (var db = dbContextFactory.CreateDbContext(connectionString, Log.Logger))
                string lastWordString = null;
                if (doContinueWithLastWord)
                    lastWordString = WordDatabaseService.GetLastWordFromSource(db, source);

                // Note!
                // the user needs to be added before we disable tracking and disable AutoDetectChanges
                // otherwise this will crash

                // set admin user
                var adminUser = new User()
                    FirstName = "",
                    LastName  = "Admin",
                    UserName  = "******"

                // check if user already exists
                var existingUser = db.DictionaryUsers.Where(u => u.FirstName == adminUser.FirstName).FirstOrDefault();
                if (existingUser != null)
                    adminUser = existingUser;

                // disable tracking to speed things up
                // note that this doesn't load the virtual properties, but loads the object ids after a save
                db.ChangeTracker.QueryTrackingBehavior = QueryTrackingBehavior.NoTracking;

                // this doesn't seem to work when adding new users all the time
                db.ChangeTracker.AutoDetectChangesEnabled = false;

                ReadWordsFromUrl(db, adminUser, lastWordString);
        private void DoScrape(int letterCount, string source, bool doContinueWithLastWord)
            var dbContextFactory = new DesignTimeDbContextFactory();

            using (var db = dbContextFactory.CreateDbContext(connectionString, Log.Logger))
                string lastWordString = null;
                if (doContinueWithLastWord)
                    lastWordString = WordDatabaseService.GetLastWordFromLetterCount(db, source, letterCount);

                // if we didn't get back a word, use a pattern instead
                if (lastWordString == null)
                    switch (letterCount)
                    case 1:
                        lastWordString = "a";

                    case 2:
                        lastWordString = "aa";

                        lastWordString = "aa" + new string('?', letterCount - 2);

                    Log.Information("Could not find any words having '{0}' letters. Therefore using last word pattern '{1}'.", letterCount, lastWordString);

                // Note!
                // the user needs to be added before we disable tracking and disable AutoDetectChanges
                // otherwise this will crash

                // set admin user
                var adminUser = new User()
                    FirstName = "",
                    LastName  = "Admin",
                    UserName  = "******"

                // check if user already exists
                var existingUser = db.DictionaryUsers.Where(u => u.FirstName == adminUser.FirstName).FirstOrDefault();
                if (existingUser != null)
                    adminUser = existingUser;

                // disable tracking to speed things up
                // note that this doesn't load the virtual properties, but loads the object ids after a save
                db.ChangeTracker.QueryTrackingBehavior = QueryTrackingBehavior.NoTracking;

                // this doesn't seem to work when adding new users all the time
                db.ChangeTracker.AutoDetectChangesEnabled = false;

                using (var driver = ChromeDriverUtils.GetChromeDriver(true))
                    // read all words with the letter count
                    ReadWordsByWordPermutations(letterCount, driver, db, adminUser, lastWordString);
        private void GetWordSynonyms(Word word, IWebDriver driver, WordHintDbContext db, User adminUser, string url)
            // there is a bug in the website that makes a  query with "0" fail
            if (word.Value == "0")

            // open a new tab and set the context
            var chromeDriver = (ChromeDriver)driver;

            // save a reference to our original tab's window handle
            var originalTabInstance = chromeDriver.CurrentWindowHandle;

            // execute some JavaScript to open a new window

            // save a reference to our new tab's window handle, this would be the last entry in the WindowHandles collection
            var newTabInstance = chromeDriver.WindowHandles[driver.WindowHandles.Count - 1];

            // switch our WebDriver to the new tab's window handle

            // lets navigate to a web site in our new tab

            Log.Information("Processing synonym search for '{0}'", word.Value);
            writer.WriteLine("Processing synonym search for '{0}'", word.Value);

            // parse all synonyms
            IList <IWebElement> listElements = driver.FindElements(By.XPath("//div[@id='wordlist']/ul[@class='word']/li"));
            IWebElement         ahref        = null;

            var relatedWords = new List <Word>();

            foreach (IWebElement listElement in listElements)
                    ahref = listElement.FindElement(By.TagName("a"));
                catch (NoSuchElementException)

                var hintText = ahref.Text;
                var href     = ahref.GetAttribute("href");

                var hint = new Word
                    Language        = "no",
                    Value           = hintText,
                    NumberOfLetters = hintText.Count(c => c != ' '),
                    NumberOfWords   = ScraperUtils.CountNumberOfWords(hintText),
                    User            = adminUser,
                    CreatedDate     = DateTime.Now,
                    Source          = this.source


            relatedWords = relatedWords.Distinct().ToList(); // Note that this requires the object to implement IEquatable<Word>

            // and add to database
            WordDatabaseService.AddToDatabase(db, this.source, word, relatedWords, writer);

            // now lets close our new tab

            // and switch our WebDriver back to the original tab's window handle

            // and have our WebDriver focus on the main document in the page to send commands to
        private void ReadWordsByWordPermutations(int startLetterCount, int endLetterCount, IWebDriver driver, WordHintDbContext db, User adminUser, bool doContinueWithLastWord)
            var alphabet        = "abcdefghijklmnopqrstuvwxyzåæøö";
            var permutations    = alphabet.Select(x => x.ToString());
            int permutationSize = 2;

            for (int i = 0; i < permutationSize - 1; i++)
                permutations = permutations.SelectMany(x => alphabet, (x, y) => x + y);

            var wordPermutationList = permutations.ToList();


            // use the letter count a little bit different when it comes to the alphabetic index:
            // letterCount is the index to start with divided out on the total alphabetic index
            // e.g.
            // if letter count is between 1 - 4 of a total index length of 1000:
            // 1 is 1
            // 2 is 250
            // 3 is 500
            // 4 is 750
            int length      = wordPermutationList.Count;
            int startIndex  = (int)(((double)length / (double)endLetterCount) * (startLetterCount - 1));
            int endIndex    = (int)((((double)length / (double)endLetterCount) * startLetterCount) - 1);
            var startString = wordPermutationList[startIndex];
            var endString   = wordPermutationList[endIndex];

            Log.Information("Processing alphabetic permutation search using {0}-{1} = {2}-{3} ({4} - {5}) ", startLetterCount, endLetterCount, startIndex, endIndex, startString, endString);
            writer.WriteLine("Processing alphabetic permutation search using {0}-{1} = {2}-{3} ({4} - {5}) ", startLetterCount, endLetterCount, startIndex, endIndex, startString, endString);

            // add some extra status information to the writer
            if (this.writer is SignalRClientWriter)
                (this.writer as SignalRClientWriter).ExtraStatusInformation = string.Format("Processing alphabetic permutation search using {0}-{1} = {2}-{3} ({4} - {5}) ", startLetterCount, endLetterCount, startIndex, endIndex, startString, endString);

            int curIndex = 0;

            foreach (var wordPermutation in wordPermutationList)
                string wordPattern = wordPermutation.Length == 1 && wordPermutation[0] < 45 ? string.Format("%{0:X}", (int)wordPermutation[0]) : wordPermutation;

                if (curIndex < startIndex + 1)
                    Log.Information("Skipping pattern '{0}' until we reach index {1}: '{2}'. [{3}/{4}]", wordPattern, startIndex, startString, curIndex, length);
                    writer.WriteLine("Skipping pattern '{0}' until we reach index {1}: '{2}'. [{3}/{4}]", wordPattern, startIndex, startString, curIndex, length);
                else if (length != curIndex && curIndex == endIndex + 1) // stop at last index except very last character
                    // reached the end - quit
                    Log.Information("Quitting because we have reached the last index to process: {0} at index {1}.", wordPattern, curIndex);
                    writer.WriteLine("Quitting because we have reached the last index to process: {0} at index {1}.", wordPattern, curIndex);

                string lastWordString = null;
                if (doContinueWithLastWord)
                    lastWordString = WordDatabaseService.GetLastWordFromComment(db, source, wordPattern);

                // var href = $"https://www.gratiskryssord.no/kryssordbok/?kart={wordPattern}#oppslag";
                var href = $"https://www.gratiskryssord.no/kryssordbok/alfabetisk/{wordPattern}/";
                // if (wordPermutation == "xå")
                // {
                //     wordPattern = "kå";
                //     href = $"https://www.gratiskryssord.no/kryssordbok/?kart={wordPattern}#oppslag";
                //     lastWordString = WordDatabaseService.GetLastWordFromComment(db, source, wordPattern);
                // }
                // else if (wordPermutation == "&")
                // {
                //     // debugging - break here
                // }

                wordPattern    = "na";
                href           = $"https://www.gratiskryssord.no/kryssordbok/alfabetisk/{wordPattern}/";
                lastWordString = "NAVN";
                ReadWordsByWordUrl(wordPattern, href, driver, db, adminUser, lastWordString);
        private void GetWordSynonyms(Word word, IWebDriver driver, WordHintDbContext db, User adminUser, string url)
            // there is a bug in the website that makes a  query with "0" fail
            if (word.Value == "0")

            // open a new tab and set the context
            var chromeDriver = (ChromeDriver)driver;

            // save a reference to our original tab's window handle
            var originalTabInstance = chromeDriver.CurrentWindowHandle;

            // execute some JavaScript to open a new window

            // save a reference to our new tab's window handle, this would be the last entry in the WindowHandles collection
            var newTabInstance = chromeDriver.WindowHandles[driver.WindowHandles.Count - 1];

            // switch our WebDriver to the new tab's window handle

            // lets navigate to a web site in our new tab
            // https://www.gratiskryssord.no/kryssordbok/navn/side/1/
            var page    = 1;
            var pageUrl = $"{url}side/{page}/";

            while (true)
                Log.Information("Processing synonym search for '{0}' on page {1}", word.Value, page);
                writer.WriteLine("Processing synonym search for '{0}' on page {1}", word.Value, page);

                // read the whole document into a HtmlNode
                HtmlNode doc = driver.GetDocumentNode();

                // and parse synonyms using Agility Pack
                var relatedWords = ParseSynonymsAgilityPack(word, doc, adminUser);

                // and add to database
                WordDatabaseService.AddToDatabase(db, this.source, word, relatedWords, writer, false);

                // go to next page if exist
                var nextPageElement = FindNextPageOrNull(doc, word.Value.ToLower(), page + 1);
                if (nextPageElement != null)
                    var hintText = nextPageElement.InnerText.Trim().ToUpper();
                    hintText = HttpUtility.HtmlDecode(hintText); // ensure that text like &amp; gets converted to &
                    var    href        = nextPageElement.Attributes["href"].Value;
                    string nextPageUrl = $"https://www.gratiskryssord.no{href}";


            // now lets close our new tab

            // and switch our WebDriver back to the original tab's window handle

            // and have our WebDriver focus on the main document in the page to send commands to
Exemple #8
        public static int Main(string[] args)
            Console.WriteLine("CrossWord ver. {0} ", "1.0");

            string inputFile, outputFile, puzzle, dictionaryFile;

            if (!ParseInput(args, out inputFile, out outputFile, out puzzle, out dictionaryFile))
            ICrossBoard board;

                if (inputFile.StartsWith("http"))
                    board = CrossBoardCreator.CreateFromUrl(inputFile);
                    board = CrossBoardCreator.CreateFromFile(inputFile);
            catch (Exception e)
                Console.WriteLine(string.Format("Cannot load crossword layout from file {0}.", inputFile), e);

            ICrossDictionary dictionary;

                if (dictionaryFile.Equals("database"))
                    dictionary = new DatabaseDictionary("server=localhost;port=3306;database=dictionary;user=user;password=password;charset=utf8;", board.MaxWordLength);
                    dictionary = new Dictionary(dictionaryFile, board.MaxWordLength);
            catch (Exception e)
                Console.WriteLine(string.Format("Cannot load dictionary from file {0}.", dictionaryFile), e);

            if (outputFile.Equals("signalr"))
                // generate and send to signalr hub
                // var tokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(20));
                var  tokenSource = new CancellationTokenSource();
                Task workerTask  = Task.Run(
                    async() =>
                    CancellationToken token = tokenSource.Token;
                        await Generator.GenerateCrosswordsAsync(board, dictionary, puzzle, token);
                    catch (OperationCanceledException)
                        Console.WriteLine("Cancelled @ {0}", DateTime.Now);

                // wait until the task is done

                // or wait until the user presses a key
                // Console.WriteLine("Press Enter to Exit ...");
                // Console.ReadLine();
                // tokenSource.Cancel();
            else if (outputFile.Equals("database"))
                var dbContextFactory = new DesignTimeDbContextFactory();
                using (var db = dbContextFactory.CreateDbContext("server=localhost;database=dictionary;user=user;password=password;charset=utf8;", Log.Logger)) // null instead of Log.Logger enables debugging
                    // setup database
                    // You would either call EnsureCreated() or Migrate().
                    // EnsureCreated() is an alternative that completely skips the migrations pipeline and just creates a database that matches you current model.
                    // It's good for unit testing or very early prototyping, when you are happy just to delete and re-create the database when the model changes.
                    // db.Database.EnsureDeleted();
                    // db.Database.EnsureCreated();

                    // Note! Therefore don't use EnsureDeleted() and EnsureCreated() but Migrate();

                    // set admin user
                    var user = new User()
                        FirstName = "",
                        LastName  = "Norwegian Synonyms json",
                        UserName  = "******"

                    // check if user already exists
                    var existingUser = db.DictionaryUsers.Where(u => u.FirstName == user.FirstName).FirstOrDefault();
                    if (existingUser != null)
                        user = existingUser;

                    // disable tracking to speed things up
                    // note that this doesn't load the virtual properties, but loads the object ids after a save
                    db.ChangeTracker.QueryTrackingBehavior = QueryTrackingBehavior.NoTracking;

                    // this works when using the same user for all words.
                    db.ChangeTracker.AutoDetectChangesEnabled = false;

                    bool isDebugging = false;
                    isDebugging = true;

                    var source = "norwegian-synonyms.json";
                    if (Path.GetExtension(dictionaryFile).ToLower().Equals(".json"))
                        // read json files
                        using (StreamReader r = new StreamReader(dictionaryFile))
                            var json = r.ReadToEnd();
                            var jobj = JObject.Parse(json);

                            var totalCount = jobj.Properties().Count();
                            int count      = 0;
                            foreach (var item in jobj.Properties())

                                var wordText     = item.Name;
                                var relatedArray = item.Values().Select(a => a.Value <string>());

                                WordDatabaseService.AddToDatabase(db, source, user, wordText, relatedArray);

                                if (isDebugging)
                                    // in debug mode the Console.Write \r isn't shown in the output console
                                    Console.WriteLine("[{0}] / [{1}]", count, totalCount);
                                    Console.Write("\r[{0}] / [{1}]", count, totalCount);
                ICrossBoard resultBoard;
                    resultBoard = puzzle != null
                        ? GenerateFirstCrossWord(board, dictionary, puzzle)
                        : GenerateFirstCrossWord(board, dictionary);
                catch (Exception e)
                    Console.WriteLine("Generating crossword has failed.", e);
                if (resultBoard == null)
                    Console.WriteLine(string.Format("No solution has been found."));
                    SaveResultToFile(outputFile, resultBoard, dictionary);
                catch (Exception e)
                    Console.WriteLine(string.Format("Saving result crossword to file {0} has failed.", outputFile), e);
Exemple #9
        private void DoScrape(string siteUsername, string sitePassword, int startLetterCount, int endLetterCount, string source, bool doContinueWithLastWord, bool isScraperSwarm)
            var dbContextFactory = new DesignTimeDbContextFactory();

            using (var db = dbContextFactory.CreateDbContext(connectionString, Log.Logger))
                // Note!
                // the user needs to be added before we disable tracking and disable AutoDetectChanges
                // otherwise this will crash

                // set admin user
                var adminUser = new User()
                    FirstName = "",
                    LastName  = "Admin",
                    UserName  = "******"

                // check if user already exists
                var existingUser = db.DictionaryUsers.Where(u => u.FirstName == adminUser.FirstName).FirstOrDefault();
                if (existingUser != null)
                    adminUser = existingUser;

                // disable tracking to speed things up
                // note that this doesn't load the virtual properties, but loads the object ids after a save
                db.ChangeTracker.QueryTrackingBehavior = QueryTrackingBehavior.NoTracking;

                // this doesn't seem to work when adding new users all the time
                db.ChangeTracker.AutoDetectChangesEnabled = false;

                // some patterns give back a word with one less character than asked for - it seems the Ø is messing their system up
                // UTF8 two byte problem?
                // TROND?K?????         gives TROND KJØLL
                // VEBJØRN?B????        gives VEBJØRN BERG
                // WILLY?R????????      gives WILLY RØGEBERG
                // THORBJØRN?H???????   gives THORBJØRN HÅRSTAD

                // lastWordString = "TRONSMOS VEG"; // word before TROND KJØLL
                // letterCount = 12;

                // lastWordString = "ÅSTED FOR DRAMAET ROMEO OG JULIE";
                // letterCount = 32;

                // lastWordString = "GUTTENAVN PÅ \"A\"";
                // letterCount = 16;
                // endLetterCount = 17;

                // lastWordString = "ÅPNINGSKONSERTSTYKKE";
                // letterCount = lastWordString.Length;
                // endLetterCount = 300;

                using (var driver = ChromeDriverUtils.GetChromeDriver(true))
                    DoLogon(driver, siteUsername, sitePassword);

                    for (int i = startLetterCount; i < endLetterCount; i++)
                        // reset global variables
                        hasFoundPattern   = false; // this is the first stage, we match the pattern
                        hasFoundLastWord  = false; // this is the second stage, we not only match the pattern but the word as well
                        hasMissedLastWord = false;

                        string lastWordString = null;
                        if (doContinueWithLastWord)
                            lastWordString = WordDatabaseService.GetLastWordFromLetterCount(db, source, i);

                        // don't skip any words when the last word is empty
                        if (lastWordString == null)
                            hasFoundLastWord = true;

                        // added break to support several docker instances scraping in swarms
                        if (isScraperSwarm && (i > startLetterCount))
                            Log.Error("Warning! Quitting since the current letter length > letter count: {0} / {1}", i, startLetterCount);

                        ReadWordsByWordPermutations(i, driver, db, adminUser, lastWordString);
Exemple #10
        private void ProcessWordsUntilEmpty(WordPattern wordPattern, IWebDriver driver, WordHintDbContext db, User adminUser, int page, HtmlNode documentNode, string url)
            while (true)
                Log.Information("Processing pattern search for '{0}' on page {1}", wordPattern.Pattern, page + 1);
                writer.WriteLine("Processing pattern search for '{0}' on page {1}", wordPattern.Pattern, page + 1);

                // parse all words
                var words = ReadWordsAgilityPack(documentNode, adminUser);

                foreach (var word in words)
                    if (wordPattern.IsMatchLastWord)
                        Log.Information("The current pattern matches the last-word: {0} = {1}. Current word: {2}", wordPattern.Pattern, wordPattern.LastWord, word.Value);
                        hasFoundPattern = true;

                        var wordRemoveDiacriticsToNorwegian = word.Value.RemoveDiacriticsToNorwegian();

                        // we might have had to add question marks at the end of the string to fix the length bug at the site
                        if (wordRemoveDiacriticsToNorwegian == wordPattern.LastWord.TrimEnd('?'))
                            Log.Information("The current word matches the last-word: {0} = {1}", word.Value, wordPattern.LastWord);
                            hasFoundLastWord = true;
                        if (!hasFoundLastWord && hasFoundPattern)
                            // if the pattern not any longer match, we never found the word - has it been deleted?
                            Log.Error("Warning! The current pattern does not any longer match the last-word: {0} = {1}. Current word: {2}", wordPattern.Pattern, wordPattern.LastWord, word.Value);
                            writer.WriteLine("Warning! The current pattern does not any longer match the last-word: {0} = {1}. Current word: {2}", wordPattern.Pattern, wordPattern.LastWord, word.Value);
                            hasMissedLastWord = true;

                    if (hasFoundLastWord)
                        string currentValue = word.Value;

                        // check if this is one of the buggy words from their site where the words found don't have the same length as the pattern says it should have
                        if (wordPattern.Length != word.Value.Length)
                            Log.Error("Warning! The current word doesn't match the length of the query pattern: {0} = {1}", word.Value, wordPattern.Pattern);
                            writer.WriteLine("Warning! The current word doesn't match the length of the query pattern: {0} = {1}", word.Value, wordPattern.Pattern);

                            if (wordPattern.Length > word.Value.Length)
                                currentValue = currentValue + new string('?', wordPattern.Length - word.Value.Length);
                                currentValue = currentValue.Substring(0, wordPattern.Length);
                            // everything is OK

                        // update that we are processing this word, ignore length and comment
                        WordDatabaseService.UpdateState(db, source, new Word()
                            Value = currentValue.ToUpper(), Source = source, CreatedDate = DateTime.Now
                        }, writer);

                        GetWordSynonyms(word, driver, db, adminUser);

                // go to next page if exist
                var(hasFoundNextPage, pageNumber, pageUrl, pageNode) = NavigateToNextPageIfExist(driver, documentNode);
                if (hasFoundNextPage)
                    url          = pageUrl;
                    page         = pageNumber;
                    documentNode = pageNode;
        private void ReadWordsIntoDatabase(WordHintDbContext db)
            // Create new stopwatch.
            Stopwatch stopwatch = new Stopwatch();

            // Begin timing.

            // var wordIdsToExclude = WordDatabaseService.GetWordIdList(db, new List<string> { "BY", "NAVN", "ELV", "FJELL", "FORKORTELSE", "IATA-FLYPLASSKODE", "ISO-KODE" });
            var wordIdsToExclude = WordDatabaseService.GetWordIdList(db, new List <string> {
                "BY", "NAVN"

            // search for all words
            var words = db.Words
                        .Where((w => (w.NumberOfWords == 1) && (w.NumberOfLetters <= _maxWordLength) && !wordIdsToExclude.Contains(w.WordId)))
                        .OrderBy(w => w.Value)
                        .Select(w => w.Value)

            // search for all words
            // var words = _db.Words
            //     .Where((w => (w.NumberOfWords == 1) && (w.NumberOfLetters <= _maxWordLength)))
            //     .OrderBy(w => w.Value)
            //     .Select(w => w.Value)
            //     .AsNoTracking();

            // in order to sort with Collation we need to use raw SQL
            // var words = _db.Words.FromSql(
            //     $"SELECT w.Value FROM Words AS w WHERE w.NumberOfWords = 1 AND w.NumberOfLetters <= {_maxWordLength} ORDER BY w.Value COLLATE utf8mb4_da_0900_as_cs")
            //     .Select(w => w.Value)
            //     .AsNoTracking();

            foreach (var word in words)
                string wordText = word;
                if (wordText.All(char.IsLetter))
                // if (wordText.All(x => char.IsLetter(x) || x == '-' || x == ' '))

            // using ADO.NET seems faster than ef core for raw SQLs
            // using (var command = _db.Database.GetDbConnection().CreateCommand())
            // {
            //     command.CommandText = $"SELECT w.Value FROM Words AS w WHERE w.NumberOfWords = 1 AND w.NumberOfLetters <= {_maxWordLength} ORDER BY w.Value COLLATE utf8mb4_da_0900_as_cs";
            //     db.Database.OpenConnection();
            //     using (var reader = command.ExecuteReader())
            //     {
            //         while (reader.Read())
            //         {
            //             string wordText = reader[0].ToString();
            //             if (wordText.All(char.IsLetter))
            //             // if (wordText.All(x => char.IsLetter(x) || x == '-' || x == ' '))
            //             {
            //                 AddWord(wordText);
            //             }
            //         }
            //     }
            // }

            // Stop timing.

            // Write result.
            if (_logger != null)
                _logger.LogDebug("ReadWordsIntoDatabase - Time elapsed: {0}", stopwatch.Elapsed);
                Console.WriteLine("ReadWordsIntoDatabase - Time elapsed: {0}", stopwatch.Elapsed);
Exemple #12
        private void ReadWordsFromUrl(WordHintDbContext db, User adminUser, string lastWord)
            using (WebClient client = new WebClient())
                using (Stream stream = client.OpenRead(JSON_URL))
                    using (StreamReader streamReader = new StreamReader(stream))

                        using (JsonTextReader reader = new JsonTextReader(streamReader))
                            reader.SupportMultipleContent = true;

                            string        currentValue = null;
                            List <string> currentList  = null;
                            int           totalCount   = 25000;
                            int           count        = 0;

                            bool hasFound = false;

                            var serializer = new JsonSerializer();
                            while (reader.Read())
                                // output the stream one chunk at a time
                                // Log.Information(string.Format("{0,-12}  {1}",
                                //         reader.TokenType.ToString(),
                                //         reader.Value != null ? reader.Value.ToString() : "(null)"));

                                switch (reader.TokenType)
                                // JsonToken.StartObject = deserialize only when there's "{" character in the stream
                                case JsonToken.StartObject:

                                // JsonToken.PropertyName = deserialize only when there's a "text": in the stream
                                case JsonToken.PropertyName:
                                    currentValue = reader.Value.ToString();

                                // JsonToken.String = deserialize only when there's a "text" in the stream
                                case JsonToken.String:

                                // JsonToken.StartArray = deserialize only when there's "[" character in the stream
                                case JsonToken.StartArray:
                                    currentList = new List <string>();

                                // JsonToken.EndArray = deserialize only when there's "]" character in the stream
                                case JsonToken.EndArray:

                                    // skip until we reach last word beginning
                                    if (lastWord != null)
                                        if (currentValue.ToUpperInvariant().Equals(lastWord))
                                            hasFound = true;
                                        hasFound = true;

                                    // store to database
                                    if (hasFound)
                                        // update that we are processing this word, ignore length and comment
                                        WordDatabaseService.UpdateState(db, source, new Word()
                                            Value = currentValue.ToUpper(), Source = source, CreatedDate = DateTime.Now
                                        }, writer, true);

                                        // disable storing state since we are doing it manually above
                                        WordDatabaseService.AddToDatabase(db, source, adminUser, currentValue, currentList, writer, false);

                                        // if (writer != null) writer.WriteLine("Added '{0} => {1}'", currentValue, string.Join(",", currentList));
                                        if ((count % 10) == 0)
                                            if (writer != null)
                                                writer.WriteLine("[{0}] / [{1}]", count, totalCount);

                                    //  and reset
                                    currentList  = null;
                                    currentValue = null;

                                // JsonToken.EndObject = deserialize only when there's "}" character in the stream
                                case JsonToken.EndObject:
                                    currentList  = null;
                                    currentValue = null;

             * // reading the whole thing took approx the same time as the streaming version
             * {
             *  var json = streamReader.ReadToEnd();
             *  var jobj = JObject.Parse(json);
             *  var totalCount = jobj.Properties().Count();
             *  int count = 0;
             *  foreach (var item in jobj.Properties())
             *  {
             *      count++;
             *      var currentValue = item.Name;
             *      var currentList = item.Values().Select(a => a.Value<string>());
             *      WordDatabaseService.AddToDatabase(db, source, adminUser, currentValue, currentList);
             *      // if (writer != null) writer.WriteLine("Added '{0} => {1}'", currentValue, string.Join(",", currentList));
             *      if (writer != null) writer.WriteLine("[{0}] / [{1}]", count, totalCount);
             *  }
             * }