Пример #1
0
        private void ProcessSynonymsUntilEmpty(Word word, IWebDriver driver, WordHintDbContext db, User adminUser, int page, HtmlNode documentNode, string url)
        {
            while (true)
            {
                Log.Information("Processing synonym search for '{0}' on page {1}", word.Value, page + 1);
                writer.WriteLine("Processing synonym search for '{0}' on page {1}", word.Value, page + 1);

                // parse all related words
                var relatedWords = ReadRelatedWordsAgilityPack(documentNode, adminUser);

                // and add to database
                // don't update state
                WordDatabaseService.AddToDatabase(db, this.source, word, relatedWords, writer, false);

                // go to next page if exist
                // Note! this only works if we are logged in
                var(hasFoundNextPage, pageNumber, pageUrl, pageNode) = NavigateToNextPageIfExist(driver, documentNode);
                if (hasFoundNextPage)
                {
                    url          = pageUrl;
                    page         = pageNumber;
                    documentNode = pageNode;
                }
                else
                {
                    break;
                }
            }
        }
Пример #2
0
        private void GetWordSynonyms(Word word, IWebDriver driver, WordHintDbContext db, User adminUser, string url)
        {
            // there is a bug in the website that makes a  query with "0" fail
            if (word.Value == "0")
            {
                return;
            }

            // open a new tab and set the context
            var chromeDriver = (ChromeDriver)driver;

            // save a reference to our original tab's window handle
            var originalTabInstance = chromeDriver.CurrentWindowHandle;

            // execute some JavaScript to open a new window
            chromeDriver.ExecuteScript("window.open();");

            // save a reference to our new tab's window handle, this would be the last entry in the WindowHandles collection
            var newTabInstance = chromeDriver.WindowHandles[driver.WindowHandles.Count - 1];

            // switch our WebDriver to the new tab's window handle
            chromeDriver.SwitchTo().Window(newTabInstance);

            // lets navigate to a web site in our new tab
            driver.Navigate().GoToUrl(url);

            Log.Information("Processing synonym search for '{0}'", word.Value);
            writer.WriteLine("Processing synonym search for '{0}'", word.Value);

            // parse all synonyms
            IList <IWebElement> listElements = driver.FindElements(By.XPath("//div[@id='wordlist']/ul[@class='word']/li"));
            IWebElement         ahref        = null;

            var relatedWords = new List <Word>();

            foreach (IWebElement listElement in listElements)
            {
                try
                {
                    ahref = listElement.FindElement(By.TagName("a"));
                }
                catch (NoSuchElementException)
                {
                    break;
                }

                var hintText = ahref.Text;
                var href     = ahref.GetAttribute("href");

                var hint = new Word
                {
                    Language        = "no",
                    Value           = hintText,
                    NumberOfLetters = hintText.Count(c => c != ' '),
                    NumberOfWords   = ScraperUtils.CountNumberOfWords(hintText),
                    User            = adminUser,
                    CreatedDate     = DateTime.Now,
                    Source          = this.source
                };

                relatedWords.Add(hint);
            }

            relatedWords = relatedWords.Distinct().ToList(); // Note that this requires the object to implement IEquatable<Word>

            // and add to database
            WordDatabaseService.AddToDatabase(db, this.source, word, relatedWords, writer);

            // now lets close our new tab
            chromeDriver.ExecuteScript("window.close();");

            // and switch our WebDriver back to the original tab's window handle
            chromeDriver.SwitchTo().Window(originalTabInstance);

            // and have our WebDriver focus on the main document in the page to send commands to
            chromeDriver.SwitchTo().DefaultContent();
        }
Пример #3
0
        public static int Main(string[] args)
        {
            Console.WriteLine("CrossWord ver. {0} ", "1.0");

            string inputFile, outputFile, puzzle, dictionaryFile;

            if (!ParseInput(args, out inputFile, out outputFile, out puzzle, out dictionaryFile))
            {
                return(1);
            }
            ICrossBoard board;

            try
            {
                if (inputFile.StartsWith("http"))
                {
                    board = CrossBoardCreator.CreateFromUrl(inputFile);
                }
                else
                {
                    board = CrossBoardCreator.CreateFromFile(inputFile);
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(string.Format("Cannot load crossword layout from file {0}.", inputFile), e);
                return(2);
            }

            ICrossDictionary dictionary;

            try
            {
                if (dictionaryFile.Equals("database"))
                {
                    dictionary = new DatabaseDictionary("server=localhost;port=3306;database=dictionary;user=user;password=password;charset=utf8;", board.MaxWordLength);
                }
                else
                {
                    dictionary = new Dictionary(dictionaryFile, board.MaxWordLength);
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(string.Format("Cannot load dictionary from file {0}.", dictionaryFile), e);
                return(3);
            }

            if (outputFile.Equals("signalr"))
            {
                // generate and send to signalr hub
                // var tokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(20));
                var  tokenSource = new CancellationTokenSource();
                Task workerTask  = Task.Run(
                    async() =>
                {
                    CancellationToken token = tokenSource.Token;
                    try
                    {
                        await Generator.GenerateCrosswordsAsync(board, dictionary, puzzle, token);
                    }
                    catch (OperationCanceledException)
                    {
                        Console.WriteLine("Cancelled @ {0}", DateTime.Now);
                    }
                });

                // wait until the task is done
                Task.WaitAll(workerTask);

                // or wait until the user presses a key
                // Console.WriteLine("Press Enter to Exit ...");
                // Console.ReadLine();
                // tokenSource.Cancel();
            }
            else if (outputFile.Equals("database"))
            {
                var dbContextFactory = new DesignTimeDbContextFactory();
                using (var db = dbContextFactory.CreateDbContext("server=localhost;database=dictionary;user=user;password=password;charset=utf8;", Log.Logger)) // null instead of Log.Logger enables debugging
                {
                    // setup database
                    // You would either call EnsureCreated() or Migrate().
                    // EnsureCreated() is an alternative that completely skips the migrations pipeline and just creates a database that matches you current model.
                    // It's good for unit testing or very early prototyping, when you are happy just to delete and re-create the database when the model changes.
                    // db.Database.EnsureDeleted();
                    // db.Database.EnsureCreated();

                    // Note! Therefore don't use EnsureDeleted() and EnsureCreated() but Migrate();
                    db.Database.Migrate();

                    // set admin user
                    var user = new User()
                    {
                        FirstName = "",
                        LastName  = "Norwegian Synonyms json",
                        UserName  = "******"
                    };

                    // check if user already exists
                    var existingUser = db.DictionaryUsers.Where(u => u.FirstName == user.FirstName).FirstOrDefault();
                    if (existingUser != null)
                    {
                        user = existingUser;
                    }
                    else
                    {
                        db.DictionaryUsers.Add(user);
                        db.SaveChanges();
                    }

                    // disable tracking to speed things up
                    // note that this doesn't load the virtual properties, but loads the object ids after a save
                    db.ChangeTracker.QueryTrackingBehavior = QueryTrackingBehavior.NoTracking;

                    // this works when using the same user for all words.
                    db.ChangeTracker.AutoDetectChangesEnabled = false;

                    bool isDebugging = false;
#if DEBUG
                    isDebugging = true;
#endif

                    var source = "norwegian-synonyms.json";
                    if (Path.GetExtension(dictionaryFile).ToLower().Equals(".json"))
                    {
                        // read json files
                        using (StreamReader r = new StreamReader(dictionaryFile))
                        {
                            var json = r.ReadToEnd();
                            var jobj = JObject.Parse(json);

                            var totalCount = jobj.Properties().Count();
                            int count      = 0;
                            foreach (var item in jobj.Properties())
                            {
                                count++;

                                var wordText     = item.Name;
                                var relatedArray = item.Values().Select(a => a.Value <string>());

                                WordDatabaseService.AddToDatabase(db, source, user, wordText, relatedArray);

                                if (isDebugging)
                                {
                                    // in debug mode the Console.Write \r isn't shown in the output console
                                    Console.WriteLine("[{0}] / [{1}]", count, totalCount);
                                }
                                else
                                {
                                    Console.Write("\r[{0}] / [{1}]", count, totalCount);
                                }
                            }
                            Console.WriteLine("Done!");
                        }
                    }
                }
            }
            else
            {
                ICrossBoard resultBoard;
                try
                {
                    resultBoard = puzzle != null
                        ? GenerateFirstCrossWord(board, dictionary, puzzle)
                        : GenerateFirstCrossWord(board, dictionary);
                }
                catch (Exception e)
                {
                    Console.WriteLine("Generating crossword has failed.", e);
                    return(4);
                }
                if (resultBoard == null)
                {
                    Console.WriteLine(string.Format("No solution has been found."));
                    return(5);
                }
                try
                {
                    SaveResultToFile(outputFile, resultBoard, dictionary);
                }
                catch (Exception e)
                {
                    Console.WriteLine(string.Format("Saving result crossword to file {0} has failed.", outputFile), e);
                    return(6);
                }
            }
            return(0);
        }
Пример #4
0
        private void GetWordSynonyms(Word word, IWebDriver driver, WordHintDbContext db, User adminUser, string url)
        {
            // there is a bug in the website that makes a  query with "0" fail
            if (word.Value == "0")
            {
                return;
            }

            // open a new tab and set the context
            var chromeDriver = (ChromeDriver)driver;

            // save a reference to our original tab's window handle
            var originalTabInstance = chromeDriver.CurrentWindowHandle;

            // execute some JavaScript to open a new window
            chromeDriver.ExecuteScript("window.open();");

            // save a reference to our new tab's window handle, this would be the last entry in the WindowHandles collection
            var newTabInstance = chromeDriver.WindowHandles[driver.WindowHandles.Count - 1];

            // switch our WebDriver to the new tab's window handle
            chromeDriver.SwitchTo().Window(newTabInstance);

            // lets navigate to a web site in our new tab
            // https://www.gratiskryssord.no/kryssordbok/navn/side/1/
            var page    = 1;
            var pageUrl = $"{url}side/{page}/";

            driver.Navigate().GoToUrl(pageUrl);
            while (true)
            {
                Log.Information("Processing synonym search for '{0}' on page {1}", word.Value, page);
                writer.WriteLine("Processing synonym search for '{0}' on page {1}", word.Value, page);

                // read the whole document into a HtmlNode
                HtmlNode doc = driver.GetDocumentNode();

                // and parse synonyms using Agility Pack
                var relatedWords = ParseSynonymsAgilityPack(word, doc, adminUser);

                // and add to database
                WordDatabaseService.AddToDatabase(db, this.source, word, relatedWords, writer, false);

                // go to next page if exist
                var nextPageElement = FindNextPageOrNull(doc, word.Value.ToLower(), page + 1);
                if (nextPageElement != null)
                {
                    var hintText = nextPageElement.InnerText.Trim().ToUpper();
                    hintText = HttpUtility.HtmlDecode(hintText); // ensure that text like &amp; gets converted to &
                    var    href        = nextPageElement.Attributes["href"].Value;
                    string nextPageUrl = $"https://www.gratiskryssord.no{href}";

                    page++;
                    driver.Navigate().GoToUrl(nextPageUrl);
                }
                else
                {
                    break;
                }
            }

            // now lets close our new tab
            chromeDriver.ExecuteScript("window.close();");

            // and switch our WebDriver back to the original tab's window handle
            chromeDriver.SwitchTo().Window(originalTabInstance);

            // and have our WebDriver focus on the main document in the page to send commands to
            chromeDriver.SwitchTo().DefaultContent();
        }
Пример #5
0
        private void ReadWordsFromUrl(WordHintDbContext db, User adminUser, string lastWord)
        {
            using (WebClient client = new WebClient())
                using (Stream stream = client.OpenRead(JSON_URL))
                    using (StreamReader streamReader = new StreamReader(stream))

                        using (JsonTextReader reader = new JsonTextReader(streamReader))
                        {
                            reader.SupportMultipleContent = true;

                            string        currentValue = null;
                            List <string> currentList  = null;
                            int           totalCount   = 25000;
                            int           count        = 0;

                            bool hasFound = false;

                            var serializer = new JsonSerializer();
                            while (reader.Read())
                            {
                                // output the stream one chunk at a time
                                // Log.Information(string.Format("{0,-12}  {1}",
                                //         reader.TokenType.ToString(),
                                //         reader.Value != null ? reader.Value.ToString() : "(null)"));

                                switch (reader.TokenType)
                                {
                                // JsonToken.StartObject = deserialize only when there's "{" character in the stream
                                case JsonToken.StartObject:
                                    break;

                                // JsonToken.PropertyName = deserialize only when there's a "text": in the stream
                                case JsonToken.PropertyName:
                                    currentValue = reader.Value.ToString();
                                    break;

                                // JsonToken.String = deserialize only when there's a "text" in the stream
                                case JsonToken.String:
                                    currentList.Add(reader.Value.ToString());
                                    break;

                                // JsonToken.StartArray = deserialize only when there's "[" character in the stream
                                case JsonToken.StartArray:
                                    currentList = new List <string>();
                                    break;

                                // JsonToken.EndArray = deserialize only when there's "]" character in the stream
                                case JsonToken.EndArray:
                                    count++;

                                    // skip until we reach last word beginning
                                    if (lastWord != null)
                                    {
                                        if (currentValue.ToUpperInvariant().Equals(lastWord))
                                        {
                                            hasFound = true;
                                        }
                                    }
                                    else
                                    {
                                        hasFound = true;
                                    }

                                    // store to database
                                    if (hasFound)
                                    {
                                        // update that we are processing this word, ignore length and comment
                                        WordDatabaseService.UpdateState(db, source, new Word()
                                        {
                                            Value = currentValue.ToUpper(), Source = source, CreatedDate = DateTime.Now
                                        }, writer, true);

                                        // disable storing state since we are doing it manually above
                                        WordDatabaseService.AddToDatabase(db, source, adminUser, currentValue, currentList, writer, false);

                                        // if (writer != null) writer.WriteLine("Added '{0} => {1}'", currentValue, string.Join(",", currentList));
                                        if ((count % 10) == 0)
                                        {
                                            if (writer != null)
                                            {
                                                writer.WriteLine("[{0}] / [{1}]", count, totalCount);
                                            }
                                        }
                                    }

                                    //  and reset
                                    currentList  = null;
                                    currentValue = null;
                                    break;

                                // JsonToken.EndObject = deserialize only when there's "}" character in the stream
                                case JsonToken.EndObject:
                                    currentList  = null;
                                    currentValue = null;
                                    break;
                                }
                            }
                        }

            /*
             * // reading the whole thing took approx the same time as the streaming version
             * {
             *  var json = streamReader.ReadToEnd();
             *  var jobj = JObject.Parse(json);
             *
             *  var totalCount = jobj.Properties().Count();
             *  int count = 0;
             *  foreach (var item in jobj.Properties())
             *  {
             *      count++;
             *
             *      var currentValue = item.Name;
             *      var currentList = item.Values().Select(a => a.Value<string>());
             *
             *      WordDatabaseService.AddToDatabase(db, source, adminUser, currentValue, currentList);
             *
             *      // if (writer != null) writer.WriteLine("Added '{0} => {1}'", currentValue, string.Join(",", currentList));
             *      if (writer != null) writer.WriteLine("[{0}] / [{1}]", count, totalCount);
             *  }
             * }
             */
        }