private void DownloadFile(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string url, string filepath) { WebClient webClient = new WebClient(); webClient.DownloadFileCompleted += new AsyncCompletedEventHandler(Completed); webClient.QueryString.Add("filepath", filepath); webClient.QueryString.Add("SearchResultItem.id", newSearchResultItemRow.id.ToString()); webClient.DownloadProgressChanged += new DownloadProgressChangedEventHandler(ProgressChanged); webClient.DownloadFileAsync(new Uri(url), filepath); webClient.Dispose(); }
private void Completed(object sender, AsyncCompletedEventArgs e) { //MessageBox.Show("Download completed!"); string filepath = ((System.Net.WebClient)(sender)).QueryString["filepath"]; int id = int.Parse(((System.Net.WebClient)(sender)).QueryString["SearchResultItem.id"]); textBox1.Text += "download completed: " + filepath + System.Environment.NewLine; // Open the text file using a stream reader. using (StreamReader sr = new StreamReader(filepath)) { // Read the stream to a string, and write the string to the console. String line = sr.ReadToEnd(); SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter(); SupportSLRDataSet.SearchResultItemRow searchResultItemRow = supportSLRDataSet.SearchResultItem.FindByid(id); searchResultItemRow.endnote_content = line; searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem); searchResultItemTableAdapter.Dispose(); } }
private void FindDownloadEndnoteFile(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string contentResult, int numResult, string basePath) { string token02 = "endnote&"; List <string> partsUrlEndnote = contentResult.Split(new string[] { token02 }, StringSplitOptions.None).ToList(); if (partsUrlEndnote.Count == 2) { string token03 = "href=\""; List <string> temp = partsUrlEndnote[0].Split(new string[] { token03 }, StringSplitOptions.None).ToList(); string part1 = temp[temp.Count - 1]; string token04 = "\""; temp = partsUrlEndnote[1].Split(new string[] { token04 }, StringSplitOptions.None).ToList(); string part2 = temp[0]; string path = basePath + "SearchResultItemRow_id_" + newSearchResultItemRow.id.ToString("0000") + ".enw"; string urlDownload = part1 + token02 + part2; urlDownload = urlDownload.Replace("=", string.Empty); DownloadFile(newSearchResultItemRow, urlDownload, path); newSearchResultItemRow.endnote_url = urlDownload; } }
private void SearchSpringerLink() { string basicURL = textBox3.Text; string querystring = textBox2.Text; string titleLocator = textBox4.Text; string basicURLforResult = "http://link.springer.com/"; int page = 1; int page_end = 1; int numResult = 1; bool hasPages = true; page = int.Parse(textBox5.Text); page_end = int.Parse(textBox6.Text); while (hasPages) { string resultsFromSearch = string.Empty; try { resultsFromSearch = GetContentFromURL(String.Format(basicURL, page) + querystring); } catch (Exception ex) { textBox1.Text += "Error on: " + String.Format(basicURL, page) + querystring + System.Environment.NewLine; } if (resultsFromSearch != string.Empty) { List <string> listResults = resultsFromSearch.Split(new string[] { titleLocator }, StringSplitOptions.None).ToList(); hasPages = false; foreach (string result in listResults) { string resultTrim = result.Trim(); if (resultTrim.StartsWith("href=")) { hasPages = true; string token01 = "href=\""; string urlResult = resultTrim.Substring(resultTrim.IndexOf(token01) + token01.Length, resultTrim.IndexOf("\">") - token01.Length); string contentResult = string.Empty; contentResult = GetContentFromURL(basicURLforResult + urlResult).Trim(); SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter(); SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow = supportSLRDataSet.SearchResultItem.NewSearchResultItemRow(); newSearchResultItemRow.fk_QueryDataSourceRound_id = 9; // (int)comboBox1.SelectedValue; newSearchResultItemRow.url = urlResult; newSearchResultItemRow.page_content = contentResult; newSearchResultItemRow.errors_onload = string.Empty; // Create Id to reference on downloaded file names supportSLRDataSet.SearchResultItem.Rows.Add(newSearchResultItemRow); searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem); // Find and download Endnote file (enw) try { FindDownloadEndnoteFile(newSearchResultItemRow, contentResult, numResult, basePathSpringerLink); } catch (Exception ex) { newSearchResultItemRow.errors_onload += "FindDownloadEndnoteFile: " + ex.Message + ex.Source + System.Environment.NewLine; } // Find properties of the result (title, abstract, keywords, year, pages, data source type) try { FindProperties(newSearchResultItemRow, contentResult); } catch (Exception ex) { newSearchResultItemRow.errors_onload += "FindProperties: " + ex.Message + ex.Source + System.Environment.NewLine; } searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem); searchResultItemTableAdapter.Dispose(); numResult++; } } page++; if (page > page_end) { hasPages = false; } } } }
private void FindProperties(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string contentResult) { string token02 = "<title>"; string token03 = "</title>"; string title = contentResult.Substring(contentResult.IndexOf(token02) + token02.Length); title = title.Substring(0, title.IndexOf(token03)); title = title.Replace(" - Springer", string.Empty).Replace(" | SpringerLink", string.Empty); string token04 = "<h2 class=\"Heading\">Abstract</h2>"; string valAbstract = contentResult.Substring(contentResult.IndexOf(token04) + token04.Length).Trim(); string token05 = "<p class=\"Para\">"; valAbstract = valAbstract.Replace(token05, string.Empty); string token06 = "</p>"; valAbstract = valAbstract.Substring(0, valAbstract.IndexOf(token06)); string token07 = "<span class=\"Keyword\">"; List <string> tempKeywords = contentResult.Split(new string[] { token07 }, StringSplitOptions.None).ToList(); List <string> keywords = new List <string>(); int idxKeywords = 0; foreach (string tempKeyword in tempKeywords) { if (idxKeywords > 0) { string token08 = "</span>"; string keyword = tempKeyword.Substring(0, tempKeyword.IndexOf(token08)); keywords.Add(keyword); } idxKeywords++; } string tokenPages = "<input type=\"hidden\" name=\"pages\" value=\""; string pages = string.Empty; if (contentResult.IndexOf(tokenPages) > 0) { pages = contentResult.Substring(contentResult.IndexOf(tokenPages)); pages = pages.Substring(0, pages.IndexOf("\"")); } else { tokenPages = "span class=\"ArticleCitation_Pages\">"; if (contentResult.IndexOf(tokenPages) > 0) { pages = contentResult.Substring(contentResult.IndexOf(tokenPages) + tokenPages.Length); pages = pages.Substring(0, pages.IndexOf("</span>")); } } string tokenYear = "<input type=\"hidden\" name=\"year\" value=\""; string year = string.Empty; if (contentResult.IndexOf(tokenYear) > 0) { year = contentResult.Substring(contentResult.IndexOf(tokenYear)); year = year.Substring(0, year.IndexOf("\"")); } else { tokenYear = "<time>"; if (contentResult.IndexOf(tokenYear) > 0) { year = contentResult.Substring(contentResult.IndexOf(tokenYear) + tokenYear.Length); year = year.Substring(0, year.IndexOf("</time>")); } } string tokenType = "<input type=\"hidden\" name=\"type\" value=\""; if (contentResult.IndexOf(tokenType) > 0) { string type = contentResult.Substring(contentResult.IndexOf(tokenType)); type = type.Substring(0, type.IndexOf("\"")); newSearchResultItemRow.type = type; } newSearchResultItemRow.title = title; newSearchResultItemRow._abstract = valAbstract; newSearchResultItemRow.pages = pages; newSearchResultItemRow.date_publication = year; }
private void ReprocessSearchScienceDirect() { string basicURL = textBox3.Text; string querystring = textBox2.Text; string titleLocator = textBox4.Text; int page = 1; int page_end = 1; int numResult = 1; bool hasPages = true; page = int.Parse(textBox5.Text); page_end = int.Parse(textBox6.Text); List <string> lstUrls = new List <string>(); //lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0196064403008217"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1386505606002000"); //lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1072751501007694"); //lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0169260700001358"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1386505615000660"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0146000509001104"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0196064408018982"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0140673613621509"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0029655408001589"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1072751507006953"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0140673609603159"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1471491414000628"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0196064411015502"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1072751512004474"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0002817714623369"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1386505605000730"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1473309909701768"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0196655304003645"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1932227511000449"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0033350616000093"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0736467912010980"); lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1067991X14000765"); foreach (string url in lstUrls) { string contentResult = string.Empty; contentResult = GetContentFromURL(url).Trim(); SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter(); SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow = supportSLRDataSet.SearchResultItem.NewSearchResultItemRow(); newSearchResultItemRow.fk_QueryDataSourceRound_id = 2; //(int)comboBox1.SelectedValue; newSearchResultItemRow.url = url; newSearchResultItemRow.page_content = contentResult; newSearchResultItemRow.errors_onload = string.Empty; // Create Id to reference on downloaded file names supportSLRDataSet.SearchResultItem.Rows.Add(newSearchResultItemRow); searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem); // Find properties of the result (title, abstract, keywords, year, pages, data source type) try { FindProperties(newSearchResultItemRow, contentResult); } catch (Exception ex) { newSearchResultItemRow.errors_onload += "FindProperties: " + ex.Message + ex.Source + System.Environment.NewLine; } searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem); searchResultItemTableAdapter.Dispose(); numResult++; } }
private void SearchScienceDirect() { string basicURL = textBox3.Text; string querystring = textBox2.Text; string titleLocator = textBox4.Text; int page = 1; int page_end = 1; int numResult = 1; bool hasPages = true; page = int.Parse(textBox5.Text); page_end = int.Parse(textBox6.Text); while (hasPages) { string resultsFromSearch = string.Empty; string url = String.Format(basicURL + querystring, page - 2); try { resultsFromSearch = GetContentFromURL(url); } catch (Exception ex) { textBox1.Text += "Error on: " + url + System.Environment.NewLine; } if (resultsFromSearch != string.Empty) { List <string> listResults = resultsFromSearch.Split(new string[] { titleLocator }, StringSplitOptions.None).ToList(); hasPages = false; foreach (string result in listResults) { string resultTrim = result.Trim(); if (resultTrim.Contains(">")) { resultTrim = resultTrim.Substring(resultTrim.IndexOf(">") + 1); } if (resultTrim.StartsWith("<H2>")) { hasPages = true; string token01 = "href=\""; string urlResult = resultTrim.Substring(resultTrim.IndexOf(token01) + token01.Length); urlResult = urlResult.Substring(0, urlResult.IndexOf("\"")); string contentResult = string.Empty; contentResult = GetContentFromURL(urlResult).Trim(); SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter(); SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow = supportSLRDataSet.SearchResultItem.NewSearchResultItemRow(); newSearchResultItemRow.fk_QueryDataSourceRound_id = 8; //(int)comboBox1.SelectedValue; newSearchResultItemRow.url = urlResult; newSearchResultItemRow.page_content = contentResult; newSearchResultItemRow.errors_onload = string.Empty; // Create Id to reference on downloaded file names supportSLRDataSet.SearchResultItem.Rows.Add(newSearchResultItemRow); searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem); /* * Not done because on the first page of the results it iss possible to export all results in a single file (to be used afterwards to cite in Word) * // Find and download Endnote file (enw) * try * { * FindDownloadEndnoteFile(newSearchResultItemRow, contentResult, numResult, basePathSpringerLink); * } * catch (Exception ex) * { * newSearchResultItemRow.errors_onload += "FindDownloadEndnoteFile: " + ex.Message + ex.Source + System.Environment.NewLine; * } */ // Find properties of the result (title, abstract, keywords, year, pages, data source type) try { FindProperties(newSearchResultItemRow, contentResult); } catch (Exception ex) { newSearchResultItemRow.errors_onload += "FindProperties: " + ex.Message + ex.Source + System.Environment.NewLine; } searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem); searchResultItemTableAdapter.Dispose(); numResult++; } } } page++; if (page > page_end) { hasPages = false; } } }
private void FindProperties(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string contentResult) { string tokenTitle = "<title>"; string token03 = "</title>"; string title = contentResult.Substring(contentResult.ToLower().IndexOf(tokenTitle) + tokenTitle.Length); //title = title.Substring(title.IndexOf(token03) + token03.Length); title = title.Substring(0, title.ToLower().IndexOf(token03)).Replace(System.Environment.NewLine, "").Trim(); string token04 = "<h2 class=\"Heading\">Abstract</h2>"; /*string valAbstract = contentResult.Substring(contentResult.IndexOf(token04) + token04.Length).Trim(); * * string token05 = "<p class=\"Para\">"; * valAbstract = valAbstract.Replace(token05, string.Empty); * * string token06 = "</p>"; * valAbstract = valAbstract.Substring(0, valAbstract.IndexOf(token06)); * * string token07 = "<span class=\"Keyword\">"; * List<string> tempKeywords = contentResult.Split(new string[] { token07 }, StringSplitOptions.None).ToList(); * List<string> keywords = new List<string>(); * int idxKeywords = 0; * foreach (string tempKeyword in tempKeywords) * { * if (idxKeywords > 0) * { * string token08 = "</span>"; * string keyword = tempKeyword.Substring(0, tempKeyword.IndexOf(token08)); * keywords.Add(keyword); * } * idxKeywords++; * } */ string token = "<p class=\"volIssue\">"; string volIssue = contentResult.Substring(contentResult.IndexOf(token) + token.Length); string tokenPages = "Pages"; string pages = string.Empty; if (contentResult.IndexOf(tokenPages) > 0) { pages = volIssue.Substring(volIssue.IndexOf(tokenPages)); pages = pages.Substring(0, pages.IndexOf("</p>")); pages = pages.Replace("Pages", "").Trim(); } else { tokenPages = @"\d+(?:-\d+)?(?:,\d+(?:-\d+)?)*"; Regex regexPages = new Regex(tokenPages); Match matchPages = regexPages.Match(volIssue); if (matchPages.Success) { pages = matchPages.Value; } } if (volIssue.StartsWith("<a")) { volIssue = volIssue.Substring(volIssue.IndexOf(">")); } string tokenYear = @"(19|20)\d\d"; string year = string.Empty; Regex regex = new Regex(tokenYear); Match match = regex.Match(volIssue); if (match.Success) { year = match.Value; } string tokenTypeBook = "book/"; string tokenTypeJournal = "journal/"; string type = string.Empty; if (contentResult.IndexOf(tokenTypeBook) > 0) { type = "Book Section"; } else if (contentResult.IndexOf(tokenTypeJournal) > 0) { type = "Journal"; } else { type = "Conference proceeding"; } newSearchResultItemRow.title = title; //newSearchResultItemRow._abstract = valAbstract; newSearchResultItemRow.pages = pages; newSearchResultItemRow.date_publication = year; newSearchResultItemRow.type = type; }
private void SearchWiley() { string basicURL = textBox3.Text; string querystring = textBox2.Text; string titleLocator = textBox4.Text; int page = 1; int page_end = 1; int numResult = 1; bool hasPages = true; page = int.Parse(textBox5.Text); page_end = int.Parse(textBox6.Text); while (hasPages) { string resultsFromSearch = string.Empty; string url = String.Format(basicURL + querystring, (page - 1) * 20 + 1); url = url.Replace("&", "&"); try { resultsFromSearch = GetContentFromURL(url); } catch (Exception ex) { textBox1.Text += "Error on: " + url + System.Environment.NewLine; } if (resultsFromSearch != string.Empty) { List <string> listResults = resultsFromSearch.Split(new string[] { titleLocator }, StringSplitOptions.None).ToList(); listResults.RemoveAt(0); hasPages = false; foreach (string result in listResults) { string resultTrim = result.Trim(); hasPages = true; string token01 = "href=\""; string urlResult = resultTrim.Substring(resultTrim.IndexOf(token01) + token01.Length); urlResult = basicURL + urlResult.Substring(0, urlResult.IndexOf("\"")).Replace("<", "%3C").Replace(">", "%3E"); string contentResult = string.Empty; contentResult = GetContentFromURL(urlResult).Trim(); SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter(); SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow = supportSLRDataSet.SearchResultItem.NewSearchResultItemRow(); newSearchResultItemRow.fk_QueryDataSourceRound_id = 12; //(int)comboBox1.SelectedValue; newSearchResultItemRow.url = urlResult; newSearchResultItemRow.page_content = contentResult; newSearchResultItemRow.errors_onload = string.Empty; // Create Id to reference on downloaded file names supportSLRDataSet.SearchResultItem.Rows.Add(newSearchResultItemRow); searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem); // Find properties of the result (title, abstract, keywords, year, pages, data source type) try { FindProperties(newSearchResultItemRow, contentResult); } catch (Exception ex) { newSearchResultItemRow.errors_onload += "FindProperties: " + ex.Message + ex.Source + System.Environment.NewLine; } searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem); searchResultItemTableAdapter.Dispose(); numResult++; } } page++; if (page > page_end) { hasPages = false; } } }
private void FindProperties(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string contentResult) { string temp = "name=\"citation_title\" content=\""; string title = contentResult.Substring(contentResult.ToLower().IndexOf(temp) + temp.Length); temp = "\" />"; title = title.Substring(0, title.ToLower().IndexOf(temp)).Replace(System.Environment.NewLine, "").Trim().Replace("–", "–").Replace("‐", "-"); temp = "name=\"citation_firstpage\" content=\""; string pages = string.Empty; if (contentResult.Contains(temp)) { pages = contentResult.Substring(contentResult.ToLower().IndexOf(temp) + temp.Length); temp = "\" />"; pages = pages.Substring(0, pages.ToLower().IndexOf(temp)).Replace(System.Environment.NewLine, "").Trim(); temp = "name=\"citation_lastpage\" content=\""; pages += "-" + contentResult.Substring(contentResult.ToLower().IndexOf(temp) + temp.Length); temp = "\" />"; pages = pages.Substring(0, pages.ToLower().IndexOf(temp)).Replace(System.Environment.NewLine, "").Trim(); } string year = string.Empty; temp = "name=\"citation_publication_date\" content=\""; if (contentResult.Contains(temp)) { year = contentResult.Substring(contentResult.ToLower().IndexOf(temp) + temp.Length); temp = "\" />"; year = year.Substring(0, year.ToLower().IndexOf(temp)).Replace(System.Environment.NewLine, "").Trim(); Regex regex = new Regex(@"(19|20)\d\d"); Match match = regex.Match(year); if (match.Success) { year = match.Value; } } else { temp = "name=\"citation_online_date\" content=\""; if (contentResult.Contains(temp)) { year = contentResult.Substring(contentResult.ToLower().IndexOf(temp) + temp.Length); temp = "\" />"; year = year.Substring(0, year.ToLower().IndexOf(temp)).Replace(System.Environment.NewLine, "").Trim(); Regex regex = new Regex(@"(19|20)\d\d"); Match match = regex.Match(year); if (match.Success) { year = match.Value; } } } string type = string.Empty; temp = "citation_book_title"; if (contentResult.IndexOf(temp) > 0) { type = "Book Section"; } else { temp = "citation_journal_title"; if (contentResult.IndexOf(temp) > 0) { type = "Journal"; } else { type = "Conference proceeding"; } } newSearchResultItemRow.title = title; newSearchResultItemRow.pages = pages; newSearchResultItemRow.date_publication = year; newSearchResultItemRow.type = type; }
private void SearchScopus() { string filepath = textBox3.Text; string typeLocator = textBox4.Text; string resultsFromFile = string.Empty; try { resultsFromFile = GetContentFromFile(filepath); } catch (Exception ex) { textBox1.Text += "Error on: " + filepath + Environment.NewLine; } List <string> listResults = resultsFromFile.Split(new string[] { typeLocator }, StringSplitOptions.None).ToList(); listResults.RemoveAt(0); foreach (string result in listResults) { string resultTrim = result.Trim(); SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter(); SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow = supportSLRDataSet.SearchResultItem.NewSearchResultItemRow(); newSearchResultItemRow.fk_QueryDataSourceRound_id = 11; //(int)comboBox1.SelectedValue; string line_break = "\n"; string temp = resultTrim; string type = temp.Substring(0, temp.IndexOf(line_break)).Trim(); type = (type == "JOUR") ? "Journal" : (type == "CONF") ? "Conference proceeding" : "Book Section"; temp = temp.Substring(temp.IndexOf(line_break)); string temp2 = "TI - "; temp = temp.Substring(temp.IndexOf(temp2) + temp2.Length); string title = temp.Substring(0, temp.IndexOf(line_break)).Trim(); string pages = string.Empty; temp2 = "SP - "; if (temp.Contains(temp2)) { temp = temp.Substring(temp.IndexOf(temp2) + temp2.Length); pages = temp.Substring(0, temp.IndexOf(line_break)).Trim(); temp2 = "EP - "; temp = temp.Substring(temp.IndexOf(temp2) + temp2.Length); pages += "-" + temp.Substring(0, temp.IndexOf(line_break)).Trim(); } temp2 = "PY - "; temp = temp.Substring(temp.IndexOf(temp2) + temp2.Length); string year = temp.Substring(0, temp.IndexOf(line_break)).Trim(); temp2 = "UR - "; temp = temp.Substring(temp.IndexOf(temp2) + temp2.Length); string url = temp.Substring(0, temp.IndexOf(line_break)).Trim(); newSearchResultItemRow.type = type; newSearchResultItemRow.title = title; newSearchResultItemRow.pages = pages; newSearchResultItemRow.date_publication = year; newSearchResultItemRow.url = url; supportSLRDataSet.SearchResultItem.Rows.Add(newSearchResultItemRow); searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem); try { // Search by title in Google string query = "\"" + title + "\""; var client = new SearchClient(query); string content = string.Empty; foreach (var hit in client.Query()) { string urlResult = hit.CleanUri.ToString(); try { content = GetContentFromURL(urlResult); } catch (Exception ex) { textBox1.Text += "Error on: " + url + Environment.NewLine; } //if (!content.StartsWith("%PDF") && content.ToLower().Contains("abstract")) break; } newSearchResultItemRow.page_content = content; } catch (Exception ex) { textBox1.Text += "Error on Googling: " + ex.Message + Environment.NewLine; } newSearchResultItemRow.errors_onload = string.Empty; searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem); searchResultItemTableAdapter.Dispose(); } }
private void FindProperties(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string contentResult) { string tokenTitle = "<title>"; string token03 = "</title>"; string title = contentResult.Substring(contentResult.ToLower().IndexOf(tokenTitle) + tokenTitle.Length); //title = title.Substring(title.IndexOf(token03) + token03.Length); title = title.Substring(0, title.ToLower().IndexOf(token03)).Replace(System.Environment.NewLine, "").Trim().Replace(" - Springer", string.Empty).Replace(" | SpringerLink", string.Empty).Replace(" | HTML", string.Empty).Replace(" | HazNet", string.Empty); string temp = "| Free Full-Text |"; if (title.Contains(temp)) { title = title.Substring(title.IndexOf(temp) + temp.Length); } temp = "<title"; if (title.Contains(temp)) { title = title.Substring(title.IndexOf(temp) + temp.Length); temp = ">"; if (title.Contains(temp)) { title = title.Substring(title.IndexOf(temp) + temp.Length); } temp = " | "; if (title.Contains(temp)) { title = title.Substring(0, title.IndexOf(temp)); } } string token = "<p class=\"volIssue\">"; string volIssue = contentResult.Substring(contentResult.IndexOf(token) + token.Length); string tokenPages = "Pages"; string pages = volIssue; if (contentResult.IndexOf(tokenPages) > 0) { temp = ", Pages "; if (pages.Contains(temp)) { pages = pages.Substring(pages.IndexOf(temp) + temp.Length).Trim(); temp = ","; if (pages.Contains(temp)) { pages = pages.Substring(0, pages.IndexOf(temp)).Trim(); } } else { pages = volIssue.Substring(volIssue.IndexOf(tokenPages)); pages = pages.Substring(0, pages.IndexOf("</p>")); pages = pages.Replace("Pages", "").Replace("pp", "").Trim(); } temp = "postProcessingHook\">"; if (pages.Contains(temp)) { pages = pages.Substring(pages.IndexOf(temp) + temp.Length).Trim(); } temp = "page-ranges\">"; if (pages.Contains(temp)) { pages = pages.Substring(pages.IndexOf(temp) + temp.Length).Trim(); } temp = "</dd>"; if (pages.Contains(temp)) { pages = pages.Substring(0, pages.IndexOf(temp)).Trim(); } temp = "\">"; if (pages.StartsWith(temp)) { pages = pages.Substring(pages.IndexOf(temp) + temp.Length); } if (pages.Contains(temp)) { pages = pages.Substring(0, pages.IndexOf(temp)); } temp = "</"; if (pages.Contains(temp)) { pages = pages.Substring(0, pages.IndexOf(temp)); } } else { tokenPages = @"\d+(?:-\d+)?(?:,\d+(?:-\d+)?)*"; Regex regexPages = new Regex(tokenPages); Match matchPages = regexPages.Match(volIssue); if (matchPages.Success) { pages = matchPages.Value; } } if (volIssue.StartsWith("<a")) { volIssue = volIssue.Substring(volIssue.IndexOf(">")); } string year = volIssue; temp = "copyrightyear\" value=\""; if (year.Contains(temp)) { year = year.Substring(year.IndexOf(temp) + temp.Length); } temp = "publicationDate\" content=\""; if (year.Contains(temp)) { year = year.Substring(year.IndexOf(temp) + temp.Length); } temp = "Publication date </div><div class=\"display_record_indexing_data\"><span class=\"subjectField-postProcessingHook\">"; if (year.Contains(temp)) { year = year.Substring(year.IndexOf(temp) + temp.Length); } temp = "Publication History"; if (year.Contains(temp)) { year = year.Substring(year.IndexOf(temp) + temp.Length); } string tokenYear = @"(19|20)\d\d"; Regex regex = new Regex(tokenYear); Match match = regex.Match(year); if (match.Success) { year = match.Value; } string tokenTypeBook = "book/"; string tokenTypeJournal = "journal/"; string type = string.Empty; if (contentResult.IndexOf(tokenTypeBook) > 0) { type = "Book Section"; } else if (contentResult.IndexOf(tokenTypeJournal) > 0) { type = "Journal"; } else { type = "Conference proceeding"; } temp = "Publication title"; if (volIssue.Contains(temp)) { volIssue = volIssue.Substring(volIssue.IndexOf(temp) + temp.Length, 300); if (volIssue.ToLower().Contains("journal")) { type = "Journal"; } } newSearchResultItemRow.title = title; //newSearchResultItemRow._abstract = valAbstract; newSearchResultItemRow.pages = pages; newSearchResultItemRow.date_publication = year; newSearchResultItemRow.type = type; }