private void ProcessSynonymsUntilEmpty(Word word, IWebDriver driver, WordHintDbContext db, User adminUser, int page, HtmlNode documentNode, string url) { while (true) { Log.Information("Processing synonym search for '{0}' on page {1}", word.Value, page + 1); writer.WriteLine("Processing synonym search for '{0}' on page {1}", word.Value, page + 1); // parse all related words var relatedWords = ReadRelatedWordsAgilityPack(documentNode, adminUser); // and add to database // don't update state WordDatabaseService.AddToDatabase(db, this.source, word, relatedWords, writer, false); // go to next page if exist // Note! this only works if we are logged in var(hasFoundNextPage, pageNumber, pageUrl, pageNode) = NavigateToNextPageIfExist(driver, documentNode); if (hasFoundNextPage) { url = pageUrl; page = pageNumber; documentNode = pageNode; } else { break; } } }
private void GetWordSynonyms(Word word, IWebDriver driver, WordHintDbContext db, User adminUser, string url) { // there is a bug in the website that makes a query with "0" fail if (word.Value == "0") { return; } // open a new tab and set the context var chromeDriver = (ChromeDriver)driver; // save a reference to our original tab's window handle var originalTabInstance = chromeDriver.CurrentWindowHandle; // execute some JavaScript to open a new window chromeDriver.ExecuteScript("window.open();"); // save a reference to our new tab's window handle, this would be the last entry in the WindowHandles collection var newTabInstance = chromeDriver.WindowHandles[driver.WindowHandles.Count - 1]; // switch our WebDriver to the new tab's window handle chromeDriver.SwitchTo().Window(newTabInstance); // lets navigate to a web site in our new tab driver.Navigate().GoToUrl(url); Log.Information("Processing synonym search for '{0}'", word.Value); writer.WriteLine("Processing synonym search for '{0}'", word.Value); // parse all synonyms IList <IWebElement> listElements = driver.FindElements(By.XPath("//div[@id='wordlist']/ul[@class='word']/li")); IWebElement ahref = null; var relatedWords = new List <Word>(); foreach (IWebElement listElement in listElements) { try { ahref = listElement.FindElement(By.TagName("a")); } catch (NoSuchElementException) { break; } var hintText = ahref.Text; var href = ahref.GetAttribute("href"); var hint = new Word { Language = "no", Value = hintText, NumberOfLetters = hintText.Count(c => c != ' '), NumberOfWords = ScraperUtils.CountNumberOfWords(hintText), User = adminUser, CreatedDate = DateTime.Now, Source = this.source }; relatedWords.Add(hint); } relatedWords = relatedWords.Distinct().ToList(); // Note that this requires the object to implement IEquatable<Word> // and add to database WordDatabaseService.AddToDatabase(db, this.source, word, relatedWords, writer); // now lets close our new tab chromeDriver.ExecuteScript("window.close();"); // and switch our WebDriver back to the original tab's window handle chromeDriver.SwitchTo().Window(originalTabInstance); // and have our WebDriver focus on the main document in the page to send commands to chromeDriver.SwitchTo().DefaultContent(); }
public static int Main(string[] args) { Console.WriteLine("CrossWord ver. {0} ", "1.0"); string inputFile, outputFile, puzzle, dictionaryFile; if (!ParseInput(args, out inputFile, out outputFile, out puzzle, out dictionaryFile)) { return(1); } ICrossBoard board; try { if (inputFile.StartsWith("http")) { board = CrossBoardCreator.CreateFromUrl(inputFile); } else { board = CrossBoardCreator.CreateFromFile(inputFile); } } catch (Exception e) { Console.WriteLine(string.Format("Cannot load crossword layout from file {0}.", inputFile), e); return(2); } ICrossDictionary dictionary; try { if (dictionaryFile.Equals("database")) { dictionary = new DatabaseDictionary("server=localhost;port=3306;database=dictionary;user=user;password=password;charset=utf8;", board.MaxWordLength); } else { dictionary = new Dictionary(dictionaryFile, board.MaxWordLength); } } catch (Exception e) { Console.WriteLine(string.Format("Cannot load dictionary from file {0}.", dictionaryFile), e); return(3); } if (outputFile.Equals("signalr")) { // generate and send to signalr hub // var tokenSource = new CancellationTokenSource(TimeSpan.FromSeconds(20)); var tokenSource = new CancellationTokenSource(); Task workerTask = Task.Run( async() => { CancellationToken token = tokenSource.Token; try { await Generator.GenerateCrosswordsAsync(board, dictionary, puzzle, token); } catch (OperationCanceledException) { Console.WriteLine("Cancelled @ {0}", DateTime.Now); } }); // wait until the task is done Task.WaitAll(workerTask); // or wait until the user presses a key // Console.WriteLine("Press Enter to Exit ..."); // Console.ReadLine(); // tokenSource.Cancel(); } else if (outputFile.Equals("database")) { var dbContextFactory = new DesignTimeDbContextFactory(); using (var db = dbContextFactory.CreateDbContext("server=localhost;database=dictionary;user=user;password=password;charset=utf8;", Log.Logger)) // null instead of Log.Logger enables debugging { // setup database // You would either call EnsureCreated() or Migrate(). // EnsureCreated() is an alternative that completely skips the migrations pipeline and just creates a database that matches you current model. // It's good for unit testing or very early prototyping, when you are happy just to delete and re-create the database when the model changes. // db.Database.EnsureDeleted(); // db.Database.EnsureCreated(); // Note! Therefore don't use EnsureDeleted() and EnsureCreated() but Migrate(); db.Database.Migrate(); // set admin user var user = new User() { FirstName = "", LastName = "Norwegian Synonyms json", UserName = "******" }; // check if user already exists var existingUser = db.DictionaryUsers.Where(u => u.FirstName == user.FirstName).FirstOrDefault(); if (existingUser != null) { user = existingUser; } else { db.DictionaryUsers.Add(user); db.SaveChanges(); } // disable tracking to speed things up // note that this doesn't load the virtual properties, but loads the object ids after a save db.ChangeTracker.QueryTrackingBehavior = QueryTrackingBehavior.NoTracking; // this works when using the same user for all words. db.ChangeTracker.AutoDetectChangesEnabled = false; bool isDebugging = false; #if DEBUG isDebugging = true; #endif var source = "norwegian-synonyms.json"; if (Path.GetExtension(dictionaryFile).ToLower().Equals(".json")) { // read json files using (StreamReader r = new StreamReader(dictionaryFile)) { var json = r.ReadToEnd(); var jobj = JObject.Parse(json); var totalCount = jobj.Properties().Count(); int count = 0; foreach (var item in jobj.Properties()) { count++; var wordText = item.Name; var relatedArray = item.Values().Select(a => a.Value <string>()); WordDatabaseService.AddToDatabase(db, source, user, wordText, relatedArray); if (isDebugging) { // in debug mode the Console.Write \r isn't shown in the output console Console.WriteLine("[{0}] / [{1}]", count, totalCount); } else { Console.Write("\r[{0}] / [{1}]", count, totalCount); } } Console.WriteLine("Done!"); } } } } else { ICrossBoard resultBoard; try { resultBoard = puzzle != null ? GenerateFirstCrossWord(board, dictionary, puzzle) : GenerateFirstCrossWord(board, dictionary); } catch (Exception e) { Console.WriteLine("Generating crossword has failed.", e); return(4); } if (resultBoard == null) { Console.WriteLine(string.Format("No solution has been found.")); return(5); } try { SaveResultToFile(outputFile, resultBoard, dictionary); } catch (Exception e) { Console.WriteLine(string.Format("Saving result crossword to file {0} has failed.", outputFile), e); return(6); } } return(0); }
private void GetWordSynonyms(Word word, IWebDriver driver, WordHintDbContext db, User adminUser, string url) { // there is a bug in the website that makes a query with "0" fail if (word.Value == "0") { return; } // open a new tab and set the context var chromeDriver = (ChromeDriver)driver; // save a reference to our original tab's window handle var originalTabInstance = chromeDriver.CurrentWindowHandle; // execute some JavaScript to open a new window chromeDriver.ExecuteScript("window.open();"); // save a reference to our new tab's window handle, this would be the last entry in the WindowHandles collection var newTabInstance = chromeDriver.WindowHandles[driver.WindowHandles.Count - 1]; // switch our WebDriver to the new tab's window handle chromeDriver.SwitchTo().Window(newTabInstance); // lets navigate to a web site in our new tab // https://www.gratiskryssord.no/kryssordbok/navn/side/1/ var page = 1; var pageUrl = $"{url}side/{page}/"; driver.Navigate().GoToUrl(pageUrl); while (true) { Log.Information("Processing synonym search for '{0}' on page {1}", word.Value, page); writer.WriteLine("Processing synonym search for '{0}' on page {1}", word.Value, page); // read the whole document into a HtmlNode HtmlNode doc = driver.GetDocumentNode(); // and parse synonyms using Agility Pack var relatedWords = ParseSynonymsAgilityPack(word, doc, adminUser); // and add to database WordDatabaseService.AddToDatabase(db, this.source, word, relatedWords, writer, false); // go to next page if exist var nextPageElement = FindNextPageOrNull(doc, word.Value.ToLower(), page + 1); if (nextPageElement != null) { var hintText = nextPageElement.InnerText.Trim().ToUpper(); hintText = HttpUtility.HtmlDecode(hintText); // ensure that text like & gets converted to & var href = nextPageElement.Attributes["href"].Value; string nextPageUrl = $"https://www.gratiskryssord.no{href}"; page++; driver.Navigate().GoToUrl(nextPageUrl); } else { break; } } // now lets close our new tab chromeDriver.ExecuteScript("window.close();"); // and switch our WebDriver back to the original tab's window handle chromeDriver.SwitchTo().Window(originalTabInstance); // and have our WebDriver focus on the main document in the page to send commands to chromeDriver.SwitchTo().DefaultContent(); }
private void ReadWordsFromUrl(WordHintDbContext db, User adminUser, string lastWord) { using (WebClient client = new WebClient()) using (Stream stream = client.OpenRead(JSON_URL)) using (StreamReader streamReader = new StreamReader(stream)) using (JsonTextReader reader = new JsonTextReader(streamReader)) { reader.SupportMultipleContent = true; string currentValue = null; List <string> currentList = null; int totalCount = 25000; int count = 0; bool hasFound = false; var serializer = new JsonSerializer(); while (reader.Read()) { // output the stream one chunk at a time // Log.Information(string.Format("{0,-12} {1}", // reader.TokenType.ToString(), // reader.Value != null ? reader.Value.ToString() : "(null)")); switch (reader.TokenType) { // JsonToken.StartObject = deserialize only when there's "{" character in the stream case JsonToken.StartObject: break; // JsonToken.PropertyName = deserialize only when there's a "text": in the stream case JsonToken.PropertyName: currentValue = reader.Value.ToString(); break; // JsonToken.String = deserialize only when there's a "text" in the stream case JsonToken.String: currentList.Add(reader.Value.ToString()); break; // JsonToken.StartArray = deserialize only when there's "[" character in the stream case JsonToken.StartArray: currentList = new List <string>(); break; // JsonToken.EndArray = deserialize only when there's "]" character in the stream case JsonToken.EndArray: count++; // skip until we reach last word beginning if (lastWord != null) { if (currentValue.ToUpperInvariant().Equals(lastWord)) { hasFound = true; } } else { hasFound = true; } // store to database if (hasFound) { // update that we are processing this word, ignore length and comment WordDatabaseService.UpdateState(db, source, new Word() { Value = currentValue.ToUpper(), Source = source, CreatedDate = DateTime.Now }, writer, true); // disable storing state since we are doing it manually above WordDatabaseService.AddToDatabase(db, source, adminUser, currentValue, currentList, writer, false); // if (writer != null) writer.WriteLine("Added '{0} => {1}'", currentValue, string.Join(",", currentList)); if ((count % 10) == 0) { if (writer != null) { writer.WriteLine("[{0}] / [{1}]", count, totalCount); } } } // and reset currentList = null; currentValue = null; break; // JsonToken.EndObject = deserialize only when there's "}" character in the stream case JsonToken.EndObject: currentList = null; currentValue = null; break; } } } /* * // reading the whole thing took approx the same time as the streaming version * { * var json = streamReader.ReadToEnd(); * var jobj = JObject.Parse(json); * * var totalCount = jobj.Properties().Count(); * int count = 0; * foreach (var item in jobj.Properties()) * { * count++; * * var currentValue = item.Name; * var currentList = item.Values().Select(a => a.Value<string>()); * * WordDatabaseService.AddToDatabase(db, source, adminUser, currentValue, currentList); * * // if (writer != null) writer.WriteLine("Added '{0} => {1}'", currentValue, string.Join(",", currentList)); * if (writer != null) writer.WriteLine("[{0}] / [{1}]", count, totalCount); * } * } */ }