Пример #1
0
        public bool getJournalsNext(string journalName, string ISSN, string keywords, ref SG.Journal journ)
        {
            Request requestJournal = new Request();
            requestJournal.AppID = "c49b4e59-08dd-4f27-a53b-53cc72f169af";
            //Response response;
            int stIndex, EndIndex;
            stIndex = journ.getNumberOfPapers() + 1;
            EndIndex = journ.getNumberOfPapers() + 100;

            requestJournal.ResultObjects = ObjectType.Publication;
            requestJournal.JournalQuery = journalName;

            //requestJournal.FulltextQuery = keywords;

            requestJournal.StartIdx = Convert.ToUInt32(stIndex);
            requestJournal.EndIdx = Convert.ToUInt32(EndIndex);

            List<SG.Paper> papers = generatePaper(requestJournal, 100, journ.getNumberOfPapers());

            for (int i = 0; i < papers.Count; i++)
            {
                journ.addPaper(papers[i]);
            }

            if (papers.Count < 100)
                return false;
            else
                return true;
        }
Пример #2
0
 public bool? getAuthStatisticsNextPage(string authUrl, ref SG.Author author)
 {
     bool isOK = true;
     setts = setRecords.ReadSettings();
     int maxResults = setts.GSMaxResults;
     int num = author.getNumberOfPapers();
     if (num < 0) return false;
     if (num % 100 != 0) return false;
     if (num >= maxResults) return false;
     authUrl += ("&pagesize=100&cstart=" + num);
     GSAuthScraper authScraper = new GSAuthScraper(authUrl, num, ref isOK);
     if (!isOK) return null;
     List<SG.Paper> papers = authScraper.getPapersOfCurrentPage();
     if (papers == null) return null;
     if (papers.Count == 0) return false;
     foreach (SG.Paper paper in papers)
     {
         if (num == maxResults) return false;
         author.addPaper(paper);
         num++;
     }
     return true;
 }
Пример #3
0
        public bool getAuthStatisticsNext(string authid, ref SG.Author auth)
        {
            Request requestPaper = new Request();
            requestPaper.AppID = "c49b4e59-08dd-4f27-a53b-53cc72f169af";
            int stIndex, EndIndex;

            stIndex = auth.getNumberOfPapers() + 1;
            EndIndex = auth.getNumberOfPapers() + 100;

            requestPaper.ResultObjects = ObjectType.Publication;
            requestPaper.AuthorID = Convert.ToUInt32(authid);
            requestPaper.StartIdx = Convert.ToUInt32(stIndex);
            requestPaper.EndIdx = Convert.ToUInt32(EndIndex);

            List<SG.Paper> papers = generatePaper(requestPaper, 100, auth.getNumberOfPapers());

            for (int i = 0; i < papers.Count; i++)
            {
                auth.addPaper(papers[i]);
            }

            if (papers.Count < 100)
                return false;
            else
                return true;
        }
Пример #4
0
        // SEARCH PAGE RESULTS NEXT PAGE
        public bool? getAuthorsNextPage(string this_url, ref SG.Author author, ref string next_url)
        {
            setts = setRecords.ReadSettings();
            int maxResults = setts.GSMaxResults;
            int num = author.getNumberOfPapers();
            if (num >= maxResults) return false;

            // CONNECTIONS
            if (this_url == null) return false;
            HtmlWeb web = new HtmlWeb();

            try
            {
                doc = web.Load(this_url);
            }
            catch (Exception e) {
                return null;
            }

            //Console.WriteLine(doc.DocumentNode.InnerHtml);

            string xpath = "//div[@class=\"gs_ri\"]";
            string title, titleLink, authors, publication, publisher, cited_by_url, summary;
            int year, rank = author.getNumberOfPapers() + 1, no_of_citations;
            HtmlNodeCollection searchResults = doc.DocumentNode.SelectNodes(xpath);
            if (searchResults == null)
            {
                if (checkForCaptcha())
                {
                    Console.WriteLine("Captcha problem ...");
                    return null;
                }
                Console.WriteLine("No results ...");
                return false;
            }
            else
            {
                //Console.WriteLine(url);
                foreach (HtmlNode n in searchResults)
                {

                    // TITLE AND TITLE LINK
                    HtmlNode child = n.SelectSingleNode(".//*[@class=\"gs_rt\"]");
                    title = child.InnerText;
                    titleLink = "";
                    HtmlNode url_node = child.SelectSingleNode(".//a");
                    if (url_node != null)
                    {
                        titleLink = url_node.GetAttributeValue("href", "");
                        if (!titleLink.Equals(""))
                        {
                            //titleLink = "http://scholar.google.com" + titleLink;
                            titleLink = titleLink.Replace("amp;", "");
                        }
                    }
                    // AUTHORS AND PUBLICATION
                    child = n.SelectSingleNode(".//*[@class=\"gs_a\"]");
                    authors = "";
                    publication = "";
                    publisher = "";
                    year = 1970;
                    if (child != null)
                    {
                        string[] names = child.InnerText.Split('-');
                        if (names.Length == 1) authors = names[0];
                        else if (names.Length == 2)
                        {
                            authors = names[0];
                            bool flag = false;
                            names[1].Trim();
                            try { year = Convert.ToInt32(names[1]); }
                            catch (FormatException fe) { flag = true; }
                            if (flag)
                            {
                                string[] p = names[1].Split(',');
                                try { year = Convert.ToInt32(p[1]); }
                                catch (Exception e) { }
                                publication = p[0];
                            }
                        }
                        else
                        {
                            authors = names[0];
                            publisher = names[2];
                            bool flag = false;
                            names[1].Trim();
                            try { year = Convert.ToInt32(names[1]); }
                            catch (FormatException fe) { flag = true; }
                            if (flag)
                            {
                                string[] p = names[1].Split(',');
                                try { year = Convert.ToInt32(p[1]); }
                                catch (Exception e) { }
                                publication = p[0];
                            }
                        }
                    }

                    // SUMMARY
                    child = n.SelectSingleNode(".//*[@class=\"gs_rs\"]");
                    summary = "";
                    if (child != null)
                    {
                        summary = child.InnerText;
                    }

                    // CITATION STUFF
                    no_of_citations = 0;
                    cited_by_url = "";
                    child = n.SelectSingleNode(".//*[@class=\"gs_fl\"]");
                    if (child != null) child = child.FirstChild;
                    if (child != null)
                    {
                        string text = child.InnerText;

                        try
                        {
                            text = text.Replace("Cited by", "");
                            text = text.Trim();
                            no_of_citations = Convert.ToInt32(text);
                        }
                        catch (Exception e) { }

                        cited_by_url = no_of_citations != 0 ? child.GetAttributeValue("href", "") : "";
                        if (!cited_by_url.Equals(""))
                        {
                            cited_by_url = "http://scholar.google.com" + cited_by_url;
                            cited_by_url = cited_by_url.Replace("amp;", "");
                        }

                    }

                    publisher.Trim();
                    publication.Trim();
                    if (num == maxResults) return false;
                    SG.Paper paper = new SG.Paper(title, titleLink, authors, summary, year, publication, publisher, no_of_citations, cited_by_url, rank);
                    author.addPaper(paper);
                    num++;
                    rank++;
                }

                //NEXT PAGE URL
                HtmlNode bottom = doc.DocumentNode.SelectSingleNode(".//*[@id=\"gs_n\"]//table//td[@align=\"left\"]//a");
                if (bottom != null)
                {
                    string url = bottom.GetAttributeValue("href", "");
                    if (!url.Equals(""))
                    {
                        url = "http://scholar.google.com" + url;
                        url = url.Replace("amp;", "");
                        next_url = url;
                    }
                    else next_url = null;
                }
                else next_url = null;

            }
            return true;
        }