public MicrosoftScholarParser() { client = new APIService(); sett = new SettingsRecord(); set = sett.ReadSettings(); masMaxResults = set.MASMaxResults; }
public bool getCitationsNext(string url,ref List<Paper> p) { HtmlWeb web; web = new HtmlWeb(); HtmlDocument doc; HtmlNode n; if (url.Contains("viewdoc"))//e.g. http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.3487 { doc = web.Load(url); HtmlNode citUrl = doc.DocumentNode.SelectSingleNode("//*[@id=\"docCites\"]/td[2]/a"); if (citUrl == null) return false; url = "http://citeseer.ist.psu.edu" + citUrl.GetAttributeValue("href", ""); } SettingsRecord sr = new SettingsRecord(); Settings s = sr.ReadSettings(); if (p.Count % 10 != 0 || p.Count >= s.CiteSeerMaxResults) return false; int pagen = p.Count; url = url + "&sort=cite&start=" + p.Count; doc = web.Load(url); if (doc != null) Console.WriteLine("Document Loaded!"); else { Console.WriteLine("Load Error!"); return false; } HtmlNodeCollection rows; Paper tempPaperObj; rows = doc.DocumentNode.SelectNodes("//*[@id=\"result_list\"]/div"); if(rows==null) { Console.WriteLine("Load Error! Next page doesn't exist! Returned."); return false; } for (int i = 0; i < rows.Count; i++) { int numCit=0, year=0; string title="", authNames="", abs, pUrl=""; if (rows[i].SelectSingleNode("div[3]/a[@title=\"number of citations\"]") != null) { try { int comI = rows[i].SelectSingleNode("div[3]/a[@title=\"number of citations\"]").InnerText.Substring(9).IndexOf(' '); if (rows[i].SelectSingleNode("div[3]/a[@title=\"number of citations\"]").InnerText.Substring(9).Remove(comI) != null) numCit = Convert.ToInt32((rows[i].SelectSingleNode("div[3]/a[@title=\"number of citations\"]").InnerText.Substring(9).Remove(comI))); } catch (Exception e) { } } else numCit = 0; try { title = rows[i].SelectSingleNode("h3/a").InnerText.Trim(); authNames = rows[i].SelectSingleNode("div[1]/span[1]").InnerText.Substring(3).Trim(); } catch (Exception e) { } String tempYear; if (rows[i].SelectSingleNode("div[1]/span[@class=\"pubyear\"]") != null) { try { tempYear = rows[i].SelectSingleNode("div[1]/span[@class=\"pubyear\"]").InnerText; if (tempYear != null) year = Convert.ToInt32(tempYear.Substring(2)); } catch (Exception e) { } } else year = 0; if (rows[i].SelectSingleNode("div[2]") != null) abs = rows[i].SelectSingleNode("div[2]").InnerText; else abs = ""; try { pUrl = "http://citeseer.ist.psu.edu" + rows[i].SelectSingleNode("h3/a").GetAttributeValue("href", ""); } catch (Exception e) { } tempPaperObj = new Paper(title, pUrl, authNames, abs, year, "", "", numCit, pUrl, 0); //if (tempPaperObj.NumberOfCitations > 0) p.Add(tempPaperObj); } return true; }
public CiteSeerJournal_FinalAuthorSearch(string searchEle, int searchTy, string affISSN, string key) //searchType 0 for authorSearch, 1 for journalSearch { SettingsRecord sett = new SettingsRecord(); Settings set = sett.ReadSettings(); maxResults = set.CiteSeerMaxResults; searchElement = searchEle; searchType = searchTy; keyword = key; affiliation = ""; ISSN = ""; auth1 = new Author(searchEle); journ1 = new Journal(searchEle); keyword = keyword.Trim(); keyword = Regex.Replace(keyword, @"\s+", " "); keyword = keyword.Replace(" ", "+"); searchElement = searchElement.Trim(); searchElement = Regex.Replace(searchElement, @"\s+", " "); searchElement = searchElement.Replace(" ", "+"); if (searchType == 0) { //initialURL = "http://citeseerx.ist.psu.edu/search?q=author%3A%28" + searchElement + "%29&submit=Search&ic=1&sort=cite&t=doc"; affiliation = affISSN; affiliation = affiliation.Trim(); affiliation = Regex.Replace(affiliation, @"\s+", " "); affiliation = affiliation.Replace(" ", "+"); initialURL = "http://citeseerx.ist.psu.edu/search?q=author%3A%28" + searchElement + "%29&submit=Search&ic=1&sort=cite&t=doc"; } else if (searchType == 1) { ISSN = ISSN.Trim(); ISSN = ISSN.Replace(" ", ""); //Console.WriteLine(searchElement + ", " + keyword + ", " + ISSN); initialURL = "http://citeseerx.ist.psu.edu/search?q=venue%3A%28" + searchElement + "%29&submit=Search&ic=1&sort=cite&t=doc"; //Console.WriteLine(initialURL); } else { initialURL = ""; } CitePage = new HtmlWeb(); noResult = 10; try { CiteDoc = CitePage.Load(initialURL); } catch (Exception e) { noResult = 0; } PageNo = 1; //Console.WriteLine("Document opened"); getNoResult(); }