public MicrosoftScholarParser()
 {
     client = new APIService();
     sett = new SettingsRecord();
     set = sett.ReadSettings();
     masMaxResults = set.MASMaxResults;
 }
Example #2
0
        public bool getCitationsNext(string url,ref List<Paper> p)
        {
            HtmlWeb web; web = new HtmlWeb();
            HtmlDocument doc;
            HtmlNode n;

            if (url.Contains("viewdoc"))//e.g. http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.3487
            {
                doc = web.Load(url);
                HtmlNode citUrl = doc.DocumentNode.SelectSingleNode("//*[@id=\"docCites\"]/td[2]/a");
                if (citUrl == null)
                    return false;
                url = "http://citeseer.ist.psu.edu" + citUrl.GetAttributeValue("href", "");
            }

            SettingsRecord sr = new SettingsRecord();
            Settings s = sr.ReadSettings();
            if (p.Count % 10 != 0 || p.Count >= s.CiteSeerMaxResults)
                return false;

            int pagen = p.Count;
            url = url + "&sort=cite&start=" + p.Count;

            doc = web.Load(url);

            if (doc != null)
                Console.WriteLine("Document Loaded!");
            else
            {
                Console.WriteLine("Load Error!");
                return false;
            }

            HtmlNodeCollection rows;
            Paper tempPaperObj;

            rows = doc.DocumentNode.SelectNodes("//*[@id=\"result_list\"]/div");
            if(rows==null)
            {
                Console.WriteLine("Load Error! Next page doesn't exist! Returned.");
                return false;
            }

            for (int i = 0; i < rows.Count; i++)
            {
                int numCit=0, year=0;
                string title="", authNames="", abs, pUrl="";
                if (rows[i].SelectSingleNode("div[3]/a[@title=\"number of citations\"]") != null)
                {
                    try
                    {
                        int comI = rows[i].SelectSingleNode("div[3]/a[@title=\"number of citations\"]").InnerText.Substring(9).IndexOf(' ');
                        if (rows[i].SelectSingleNode("div[3]/a[@title=\"number of citations\"]").InnerText.Substring(9).Remove(comI) != null)
                            numCit = Convert.ToInt32((rows[i].SelectSingleNode("div[3]/a[@title=\"number of citations\"]").InnerText.Substring(9).Remove(comI)));
                    }
                    catch (Exception e) { }
                }
                else
                    numCit = 0;

                try
                {
                    title = rows[i].SelectSingleNode("h3/a").InnerText.Trim();
                    authNames = rows[i].SelectSingleNode("div[1]/span[1]").InnerText.Substring(3).Trim();
                }
                catch (Exception e) { }

                String tempYear;
                if (rows[i].SelectSingleNode("div[1]/span[@class=\"pubyear\"]") != null)
                {
                    try
                    {
                        tempYear = rows[i].SelectSingleNode("div[1]/span[@class=\"pubyear\"]").InnerText;
                        if (tempYear != null)
                            year = Convert.ToInt32(tempYear.Substring(2));
                    }
                    catch (Exception e) { }
                }
                else year = 0;

                if (rows[i].SelectSingleNode("div[2]") != null)
                    abs = rows[i].SelectSingleNode("div[2]").InnerText;
                else
                    abs = "";

                try
                {
                    pUrl = "http://citeseer.ist.psu.edu" + rows[i].SelectSingleNode("h3/a").GetAttributeValue("href", "");
                }
                catch (Exception e) { }

                tempPaperObj = new Paper(title, pUrl, authNames, abs, year, "", "", numCit, pUrl, 0);

                //if (tempPaperObj.NumberOfCitations > 0)
                    p.Add(tempPaperObj);
            }

            return true;
        }
Example #3
0
        public CiteSeerJournal_FinalAuthorSearch(string searchEle, int searchTy, string affISSN, string key)  //searchType 0 for authorSearch, 1 for journalSearch
        {

            SettingsRecord sett = new SettingsRecord();
            Settings set = sett.ReadSettings();
            maxResults = set.CiteSeerMaxResults;

            searchElement = searchEle;
            searchType = searchTy;
            keyword = key;
            affiliation = "";
            ISSN = "";

            auth1 = new Author(searchEle);
            journ1 = new Journal(searchEle);

            keyword = keyword.Trim();
            keyword = Regex.Replace(keyword, @"\s+", " ");
            keyword = keyword.Replace(" ", "+");

            searchElement = searchElement.Trim();
            searchElement = Regex.Replace(searchElement, @"\s+", " ");
            searchElement = searchElement.Replace(" ", "+");

            if (searchType == 0)
            {
                //initialURL = "http://citeseerx.ist.psu.edu/search?q=author%3A%28" + searchElement + "%29&submit=Search&ic=1&sort=cite&t=doc";
                affiliation = affISSN;
                affiliation = affiliation.Trim();
                affiliation = Regex.Replace(affiliation, @"\s+", " ");
                affiliation = affiliation.Replace(" ", "+");

                initialURL = "http://citeseerx.ist.psu.edu/search?q=author%3A%28" + searchElement + "%29&submit=Search&ic=1&sort=cite&t=doc";
            }
            else if (searchType == 1)
            {
                ISSN = ISSN.Trim();
                ISSN = ISSN.Replace(" ", "");

                //Console.WriteLine(searchElement + ",  " + keyword + ",  " + ISSN);
                initialURL = "http://citeseerx.ist.psu.edu/search?q=venue%3A%28" + searchElement + "%29&submit=Search&ic=1&sort=cite&t=doc";
                
                //Console.WriteLine(initialURL);
            }
            else
            {
                initialURL = "";
            }
            CitePage = new HtmlWeb();

            noResult = 10;
            try
            {
                CiteDoc = CitePage.Load(initialURL);
            }
            catch (Exception e)
            {
                noResult = 0;
            }
            PageNo = 1;

            //Console.WriteLine("Document opened");

            getNoResult();
        }