public bool getJournalsNext(string journalName, string ISSN, string keywords, ref SG.Journal journ) { Request requestJournal = new Request(); requestJournal.AppID = "c49b4e59-08dd-4f27-a53b-53cc72f169af"; //Response response; int stIndex, EndIndex; stIndex = journ.getNumberOfPapers() + 1; EndIndex = journ.getNumberOfPapers() + 100; requestJournal.ResultObjects = ObjectType.Publication; requestJournal.JournalQuery = journalName; //requestJournal.FulltextQuery = keywords; requestJournal.StartIdx = Convert.ToUInt32(stIndex); requestJournal.EndIdx = Convert.ToUInt32(EndIndex); List<SG.Paper> papers = generatePaper(requestJournal, 100, journ.getNumberOfPapers()); for (int i = 0; i < papers.Count; i++) { journ.addPaper(papers[i]); } if (papers.Count < 100) return false; else return true; }
public bool? getAuthStatisticsNextPage(string authUrl, ref SG.Author author) { bool isOK = true; setts = setRecords.ReadSettings(); int maxResults = setts.GSMaxResults; int num = author.getNumberOfPapers(); if (num < 0) return false; if (num % 100 != 0) return false; if (num >= maxResults) return false; authUrl += ("&pagesize=100&cstart=" + num); GSAuthScraper authScraper = new GSAuthScraper(authUrl, num, ref isOK); if (!isOK) return null; List<SG.Paper> papers = authScraper.getPapersOfCurrentPage(); if (papers == null) return null; if (papers.Count == 0) return false; foreach (SG.Paper paper in papers) { if (num == maxResults) return false; author.addPaper(paper); num++; } return true; }
public bool getAuthStatisticsNext(string authid, ref SG.Author auth) { Request requestPaper = new Request(); requestPaper.AppID = "c49b4e59-08dd-4f27-a53b-53cc72f169af"; int stIndex, EndIndex; stIndex = auth.getNumberOfPapers() + 1; EndIndex = auth.getNumberOfPapers() + 100; requestPaper.ResultObjects = ObjectType.Publication; requestPaper.AuthorID = Convert.ToUInt32(authid); requestPaper.StartIdx = Convert.ToUInt32(stIndex); requestPaper.EndIdx = Convert.ToUInt32(EndIndex); List<SG.Paper> papers = generatePaper(requestPaper, 100, auth.getNumberOfPapers()); for (int i = 0; i < papers.Count; i++) { auth.addPaper(papers[i]); } if (papers.Count < 100) return false; else return true; }
// SEARCH PAGE RESULTS NEXT PAGE public bool? getAuthorsNextPage(string this_url, ref SG.Author author, ref string next_url) { setts = setRecords.ReadSettings(); int maxResults = setts.GSMaxResults; int num = author.getNumberOfPapers(); if (num >= maxResults) return false; // CONNECTIONS if (this_url == null) return false; HtmlWeb web = new HtmlWeb(); try { doc = web.Load(this_url); } catch (Exception e) { return null; } //Console.WriteLine(doc.DocumentNode.InnerHtml); string xpath = "//div[@class=\"gs_ri\"]"; string title, titleLink, authors, publication, publisher, cited_by_url, summary; int year, rank = author.getNumberOfPapers() + 1, no_of_citations; HtmlNodeCollection searchResults = doc.DocumentNode.SelectNodes(xpath); if (searchResults == null) { if (checkForCaptcha()) { Console.WriteLine("Captcha problem ..."); return null; } Console.WriteLine("No results ..."); return false; } else { //Console.WriteLine(url); foreach (HtmlNode n in searchResults) { // TITLE AND TITLE LINK HtmlNode child = n.SelectSingleNode(".//*[@class=\"gs_rt\"]"); title = child.InnerText; titleLink = ""; HtmlNode url_node = child.SelectSingleNode(".//a"); if (url_node != null) { titleLink = url_node.GetAttributeValue("href", ""); if (!titleLink.Equals("")) { //titleLink = "http://scholar.google.com" + titleLink; titleLink = titleLink.Replace("amp;", ""); } } // AUTHORS AND PUBLICATION child = n.SelectSingleNode(".//*[@class=\"gs_a\"]"); authors = ""; publication = ""; publisher = ""; year = 1970; if (child != null) { string[] names = child.InnerText.Split('-'); if (names.Length == 1) authors = names[0]; else if (names.Length == 2) { authors = names[0]; bool flag = false; names[1].Trim(); try { year = Convert.ToInt32(names[1]); } catch (FormatException fe) { flag = true; } if (flag) { string[] p = names[1].Split(','); try { year = Convert.ToInt32(p[1]); } catch (Exception e) { } publication = p[0]; } } else { authors = names[0]; publisher = names[2]; bool flag = false; names[1].Trim(); try { year = Convert.ToInt32(names[1]); } catch (FormatException fe) { flag = true; } if (flag) { string[] p = names[1].Split(','); try { year = Convert.ToInt32(p[1]); } catch (Exception e) { } publication = p[0]; } } } // SUMMARY child = n.SelectSingleNode(".//*[@class=\"gs_rs\"]"); summary = ""; if (child != null) { summary = child.InnerText; } // CITATION STUFF no_of_citations = 0; cited_by_url = ""; child = n.SelectSingleNode(".//*[@class=\"gs_fl\"]"); if (child != null) child = child.FirstChild; if (child != null) { string text = child.InnerText; try { text = text.Replace("Cited by", ""); text = text.Trim(); no_of_citations = Convert.ToInt32(text); } catch (Exception e) { } cited_by_url = no_of_citations != 0 ? child.GetAttributeValue("href", "") : ""; if (!cited_by_url.Equals("")) { cited_by_url = "http://scholar.google.com" + cited_by_url; cited_by_url = cited_by_url.Replace("amp;", ""); } } publisher.Trim(); publication.Trim(); if (num == maxResults) return false; SG.Paper paper = new SG.Paper(title, titleLink, authors, summary, year, publication, publisher, no_of_citations, cited_by_url, rank); author.addPaper(paper); num++; rank++; } //NEXT PAGE URL HtmlNode bottom = doc.DocumentNode.SelectSingleNode(".//*[@id=\"gs_n\"]//table//td[@align=\"left\"]//a"); if (bottom != null) { string url = bottom.GetAttributeValue("href", ""); if (!url.Equals("")) { url = "http://scholar.google.com" + url; url = url.Replace("amp;", ""); next_url = url; } else next_url = null; } else next_url = null; } return true; }