public SciencesList() { chromeDrv = new ChromeDrv(); chromeDrv2 = new ChromeDrv(); CreateSciencesList(); chromeDrv.ExitDriver(); chromeDrv2.ExitDriver(); }
public void GetSubjects(object context) { SynchronizationContext contextSync = (SynchronizationContext)context; ArticleModel article = new ArticleModel(); DataAccess dal = new DataAccess(); int year = 0; int month = 0; int index = 0; string y = ""; string m = ""; try { _browser = new ChromeDrv(); _driver = _browser.GetWebDriver(); } catch (Exception e) { contextSync.Send(OnGetArticles_Exception, e.Message); _browser.ExitDriver(); return; } Scien scien2 = new Scien(); List <Science> scienses2 = new List <Science>(); if (scien2.ReadXml()) { scienses2 = scien2.Fields.Sciences; } for (year = _yearStart; year <= _yearStop; year++) { for (month = _monthStart; month <= _monthStop; month++) { for (index = _indexStart; ; index++) { try { article.Article_Source = string.Format("{0}{1:d2}{2:d2}.{3:d5}", _pathFirst, int.Parse(year.ToString().Substring(2, 2)), month, index); contextSync.Send(OnGetSource, article.Article_Source); _driver.Navigate().GoToUrl(article.Article_Source); if (PageNotFound()) { contextSync.Send(OnGetSource_Exception, "Page not found : " + article.Article_Source); _indexStart = 1; break; } article.Id = 0; article.Category = _driver.FindElement(By.CssSelector("span.primary-subject")).Text; bool flagReturn = false; foreach (Science sc in scienses2) { foreach (Subject sb in sc.subjects) { foreach (string cat in sb.Category) { Regex regexCat = new Regex(@"\S+\.\S+"); Match matchCat = regexCat.Match(cat); if (matchCat.Value != "") { if (article.Category.Contains(matchCat.Value)) { string catStr = cat.Substring(matchCat.Length + 2, cat.Length - matchCat.Length - 2); article.Category = catStr; article.Subject = sb.Name; article.Science = sc.Name; flagReturn = true; } } if (flagReturn) { break; } } if (flagReturn) { break; } } if (flagReturn) { break; } } article.Title = _driver.FindElement(By.CssSelector("#abs > h1")).Text; var lstElem = _driver.FindElements(By.CssSelector(".authors > a")); article.Autors = ""; foreach (IWebElement elem in lstElem) { article.Autors += elem.Text + ", "; } string strDate = _driver.FindElement(By.CssSelector(".submission-history")).Text; Regex regex = new Regex(@"\d{1,2}\s\D{3}\s\d{4}\s\d{2}\W\d{2}\W\d{2}"); Match match = regex.Match(strDate); article.Publication_Date = DateTime.Parse(match.Value); article.Quotation = _driver.FindElement(By.CssSelector("#abs > blockquote")).Text; article.Document_Source = _driver.FindElement(By.CssSelector("div.full-text > ul > li > a")).GetAttribute("href") + ".pdf"; regex = new Regex(@"(pdf)\/\d{4}"); match = regex.Match(article.Document_Source); if (match.Value != "") { y = match.Value.Substring(4, 2); m = match.Value.Substring(6, 2); DirectoryInfo dir = new DirectoryInfo(_pathPDF + @"\" + y + @"\" + m); if (!dir.Exists && _pathPDF != string.Empty) { dir.Create(); } Regex regexFile = new Regex(@"\d{4}\W\d{4,5}\W(pdf)"); Match matchFile = regexFile.Match(article.Document_Source); string document_Local = _pathPDF + @"\" + y + @"\" + m + @"\" + matchFile.Value; article.Document_Local = @"\" + y + @"\" + m + @"\" + matchFile.Value; if (_flagPDF) { try { using (var client = new WebClient()) { client.Headers.Add("User-Agent: Other"); client.DownloadFile(article.Document_Source, document_Local); } } catch (Exception e) { contextSync.Send(OnGetSource_Exception, e.Message); index = --index; if (_cancel) { break; } continue; } } } else { article.Document_Local = ""; article.Document_Source = ""; } article.Recording_Date = DateTime.Now; } catch (Exception e) { contextSync.Send(OnGetSource_Exception, e.Message); Thread.Sleep(10000); index = --index; if (_cancel) { break; } continue; } if (_flagArticles) { try { dal.WriteDBSQL(article); } catch (Exception e) { contextSync.Send(OnGetArticles_Exception, e.Message); _cancel = true; index = --index; break; } } if (_cancel) { break; } } if (_cancel) { break; } if (month == 12) { _monthStart = 1; } } if (_cancel) { break; } } _yearStart = year; _monthStart = month; _indexStart = ++index; lstSettings = new List <int>(); lstSettings.Add(_yearStart); lstSettings.Add(_monthStart); lstSettings.Add(_indexStart); contextSync.Send(OnGetSettings, lstSettings); contextSync.Send(OnGetArticles_Cancel, true); }