示例#1
0
        private void DownloadFile(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string url, string filepath)
        {
            WebClient webClient = new WebClient();

            webClient.DownloadFileCompleted += new AsyncCompletedEventHandler(Completed);
            webClient.QueryString.Add("filepath", filepath);
            webClient.QueryString.Add("SearchResultItem.id", newSearchResultItemRow.id.ToString());
            webClient.DownloadProgressChanged += new DownloadProgressChangedEventHandler(ProgressChanged);
            webClient.DownloadFileAsync(new Uri(url), filepath);
            webClient.Dispose();
        }
示例#2
0
        private void Completed(object sender, AsyncCompletedEventArgs e)
        {
            //MessageBox.Show("Download completed!");
            string filepath = ((System.Net.WebClient)(sender)).QueryString["filepath"];
            int    id       = int.Parse(((System.Net.WebClient)(sender)).QueryString["SearchResultItem.id"]);

            textBox1.Text += "download completed: " + filepath + System.Environment.NewLine;

            // Open the text file using a stream reader.
            using (StreamReader sr = new StreamReader(filepath))
            {
                // Read the stream to a string, and write the string to the console.
                String line = sr.ReadToEnd();
                SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter();
                SupportSLRDataSet.SearchResultItemRow searchResultItemRow = supportSLRDataSet.SearchResultItem.FindByid(id);
                searchResultItemRow.endnote_content = line;
                searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem);
                searchResultItemTableAdapter.Dispose();
            }
        }
示例#3
0
        private void FindDownloadEndnoteFile(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string contentResult, int numResult, string basePath)
        {
            string        token02         = "endnote&";
            List <string> partsUrlEndnote = contentResult.Split(new string[] { token02 }, StringSplitOptions.None).ToList();

            if (partsUrlEndnote.Count == 2)
            {
                string        token03 = "href=\"";
                List <string> temp    = partsUrlEndnote[0].Split(new string[] { token03 }, StringSplitOptions.None).ToList();
                string        part1   = temp[temp.Count - 1];

                string token04 = "\"";
                temp = partsUrlEndnote[1].Split(new string[] { token04 }, StringSplitOptions.None).ToList();
                string part2 = temp[0];

                string path        = basePath + "SearchResultItemRow_id_" + newSearchResultItemRow.id.ToString("0000") + ".enw";
                string urlDownload = part1 + token02 + part2;
                urlDownload = urlDownload.Replace("&#x3D", string.Empty);
                DownloadFile(newSearchResultItemRow, urlDownload, path);

                newSearchResultItemRow.endnote_url = urlDownload;
            }
        }
示例#4
0
        private void SearchSpringerLink()
        {
            string basicURL          = textBox3.Text;
            string querystring       = textBox2.Text;
            string titleLocator      = textBox4.Text;
            string basicURLforResult = "http://link.springer.com/";

            int  page      = 1;
            int  page_end  = 1;
            int  numResult = 1;
            bool hasPages  = true;

            page     = int.Parse(textBox5.Text);
            page_end = int.Parse(textBox6.Text);

            while (hasPages)
            {
                string resultsFromSearch = string.Empty;
                try
                {
                    resultsFromSearch = GetContentFromURL(String.Format(basicURL, page) + querystring);
                }
                catch (Exception ex)
                {
                    textBox1.Text += "Error on: " + String.Format(basicURL, page) + querystring + System.Environment.NewLine;
                }

                if (resultsFromSearch != string.Empty)
                {
                    List <string> listResults = resultsFromSearch.Split(new string[] { titleLocator }, StringSplitOptions.None).ToList();
                    hasPages = false;
                    foreach (string result in listResults)
                    {
                        string resultTrim = result.Trim();
                        if (resultTrim.StartsWith("href="))
                        {
                            hasPages = true;

                            string token01   = "href=\"";
                            string urlResult = resultTrim.Substring(resultTrim.IndexOf(token01) + token01.Length, resultTrim.IndexOf("\">") - token01.Length);

                            string contentResult = string.Empty;
                            contentResult = GetContentFromURL(basicURLforResult + urlResult).Trim();

                            SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter();
                            SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow = supportSLRDataSet.SearchResultItem.NewSearchResultItemRow();
                            newSearchResultItemRow.fk_QueryDataSourceRound_id = 9; // (int)comboBox1.SelectedValue;
                            newSearchResultItemRow.url           = urlResult;
                            newSearchResultItemRow.page_content  = contentResult;
                            newSearchResultItemRow.errors_onload = string.Empty;

                            // Create Id to reference on downloaded file names
                            supportSLRDataSet.SearchResultItem.Rows.Add(newSearchResultItemRow);
                            searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem);

                            // Find and download Endnote file (enw)
                            try
                            {
                                FindDownloadEndnoteFile(newSearchResultItemRow, contentResult, numResult, basePathSpringerLink);
                            }
                            catch (Exception ex)
                            {
                                newSearchResultItemRow.errors_onload += "FindDownloadEndnoteFile: " + ex.Message + ex.Source + System.Environment.NewLine;
                            }

                            // Find properties of the result (title, abstract, keywords, year, pages, data source type)
                            try
                            {
                                FindProperties(newSearchResultItemRow, contentResult);
                            }
                            catch (Exception ex)
                            {
                                newSearchResultItemRow.errors_onload += "FindProperties: " + ex.Message + ex.Source + System.Environment.NewLine;
                            }

                            searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem);
                            searchResultItemTableAdapter.Dispose();

                            numResult++;
                        }
                    }
                    page++;

                    if (page > page_end)
                    {
                        hasPages = false;
                    }
                }
            }
        }
示例#5
0
        private void FindProperties(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string contentResult)
        {
            string token02 = "<title>";
            string token03 = "</title>";
            string title   = contentResult.Substring(contentResult.IndexOf(token02) + token02.Length);

            title = title.Substring(0, title.IndexOf(token03));
            title = title.Replace(" - Springer", string.Empty).Replace(" | SpringerLink", string.Empty);

            string token04     = "<h2 class=\"Heading\">Abstract</h2>";
            string valAbstract = contentResult.Substring(contentResult.IndexOf(token04) + token04.Length).Trim();

            string token05 = "<p class=\"Para\">";

            valAbstract = valAbstract.Replace(token05, string.Empty);

            string token06 = "</p>";

            valAbstract = valAbstract.Substring(0, valAbstract.IndexOf(token06));

            string        token07      = "<span class=\"Keyword\">";
            List <string> tempKeywords = contentResult.Split(new string[] { token07 }, StringSplitOptions.None).ToList();
            List <string> keywords     = new List <string>();
            int           idxKeywords  = 0;

            foreach (string tempKeyword in tempKeywords)
            {
                if (idxKeywords > 0)
                {
                    string token08 = "</span>";
                    string keyword = tempKeyword.Substring(0, tempKeyword.IndexOf(token08));
                    keywords.Add(keyword);
                }
                idxKeywords++;
            }

            string tokenPages = "<input type=\"hidden\" name=\"pages\" value=\"";
            string pages      = string.Empty;

            if (contentResult.IndexOf(tokenPages) > 0)
            {
                pages = contentResult.Substring(contentResult.IndexOf(tokenPages));
                pages = pages.Substring(0, pages.IndexOf("\""));
            }
            else
            {
                tokenPages = "span class=\"ArticleCitation_Pages\">";
                if (contentResult.IndexOf(tokenPages) > 0)
                {
                    pages = contentResult.Substring(contentResult.IndexOf(tokenPages) + tokenPages.Length);
                    pages = pages.Substring(0, pages.IndexOf("</span>"));
                }
            }

            string tokenYear = "<input type=\"hidden\" name=\"year\" value=\"";
            string year      = string.Empty;

            if (contentResult.IndexOf(tokenYear) > 0)
            {
                year = contentResult.Substring(contentResult.IndexOf(tokenYear));
                year = year.Substring(0, year.IndexOf("\""));
            }
            else
            {
                tokenYear = "<time>";
                if (contentResult.IndexOf(tokenYear) > 0)
                {
                    year = contentResult.Substring(contentResult.IndexOf(tokenYear) + tokenYear.Length);
                    year = year.Substring(0, year.IndexOf("</time>"));
                }
            }

            string tokenType = "<input type=\"hidden\" name=\"type\" value=\"";

            if (contentResult.IndexOf(tokenType) > 0)
            {
                string type = contentResult.Substring(contentResult.IndexOf(tokenType));
                type = type.Substring(0, type.IndexOf("\""));
                newSearchResultItemRow.type = type;
            }

            newSearchResultItemRow.title            = title;
            newSearchResultItemRow._abstract        = valAbstract;
            newSearchResultItemRow.pages            = pages;
            newSearchResultItemRow.date_publication = year;
        }
        private void ReprocessSearchScienceDirect()
        {
            string basicURL     = textBox3.Text;
            string querystring  = textBox2.Text;
            string titleLocator = textBox4.Text;

            int  page      = 1;
            int  page_end  = 1;
            int  numResult = 1;
            bool hasPages  = true;

            page     = int.Parse(textBox5.Text);
            page_end = int.Parse(textBox6.Text);

            List <string> lstUrls = new List <string>();

            //lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0196064403008217");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1386505606002000");
            //lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1072751501007694");
            //lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0169260700001358");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1386505615000660");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0146000509001104");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0196064408018982");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0140673613621509");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0029655408001589");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1072751507006953");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0140673609603159");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1471491414000628");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0196064411015502");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1072751512004474");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0002817714623369");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1386505605000730");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1473309909701768");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0196655304003645");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1932227511000449");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0033350616000093");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S0736467912010980");
            lstUrls.Add("http://www.sciencedirect.com/science/article/pii/S1067991X14000765");

            foreach (string url in lstUrls)
            {
                string contentResult = string.Empty;
                contentResult = GetContentFromURL(url).Trim();

                SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter();
                SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow = supportSLRDataSet.SearchResultItem.NewSearchResultItemRow();
                newSearchResultItemRow.fk_QueryDataSourceRound_id = 2; //(int)comboBox1.SelectedValue;
                newSearchResultItemRow.url           = url;
                newSearchResultItemRow.page_content  = contentResult;
                newSearchResultItemRow.errors_onload = string.Empty;

                // Create Id to reference on downloaded file names
                supportSLRDataSet.SearchResultItem.Rows.Add(newSearchResultItemRow);
                searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem);

                // Find properties of the result (title, abstract, keywords, year, pages, data source type)
                try
                {
                    FindProperties(newSearchResultItemRow, contentResult);
                }
                catch (Exception ex)
                {
                    newSearchResultItemRow.errors_onload += "FindProperties: " + ex.Message + ex.Source + System.Environment.NewLine;
                }

                searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem);
                searchResultItemTableAdapter.Dispose();

                numResult++;
            }
        }
        private void SearchScienceDirect()
        {
            string basicURL     = textBox3.Text;
            string querystring  = textBox2.Text;
            string titleLocator = textBox4.Text;

            int  page      = 1;
            int  page_end  = 1;
            int  numResult = 1;
            bool hasPages  = true;

            page     = int.Parse(textBox5.Text);
            page_end = int.Parse(textBox6.Text);


            while (hasPages)
            {
                string resultsFromSearch = string.Empty;
                string url = String.Format(basicURL + querystring, page - 2);

                try
                {
                    resultsFromSearch = GetContentFromURL(url);
                }
                catch (Exception ex)
                {
                    textBox1.Text += "Error on: " + url + System.Environment.NewLine;
                }

                if (resultsFromSearch != string.Empty)
                {
                    List <string> listResults = resultsFromSearch.Split(new string[] { titleLocator }, StringSplitOptions.None).ToList();
                    hasPages = false;

                    foreach (string result in listResults)
                    {
                        string resultTrim = result.Trim();
                        if (resultTrim.Contains(">"))
                        {
                            resultTrim = resultTrim.Substring(resultTrim.IndexOf(">") + 1);
                        }

                        if (resultTrim.StartsWith("<H2>"))
                        {
                            hasPages = true;

                            string token01   = "href=\"";
                            string urlResult = resultTrim.Substring(resultTrim.IndexOf(token01) + token01.Length);
                            urlResult = urlResult.Substring(0, urlResult.IndexOf("\""));

                            string contentResult = string.Empty;
                            contentResult = GetContentFromURL(urlResult).Trim();

                            SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter();
                            SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow = supportSLRDataSet.SearchResultItem.NewSearchResultItemRow();
                            newSearchResultItemRow.fk_QueryDataSourceRound_id = 8; //(int)comboBox1.SelectedValue;
                            newSearchResultItemRow.url           = urlResult;
                            newSearchResultItemRow.page_content  = contentResult;
                            newSearchResultItemRow.errors_onload = string.Empty;

                            // Create Id to reference on downloaded file names
                            supportSLRDataSet.SearchResultItem.Rows.Add(newSearchResultItemRow);
                            searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem);

                            /*
                             * Not done because on the first page of the results it iss possible to export all results in a single file (to be used afterwards to cite in Word)
                             * // Find and download Endnote file (enw)
                             * try
                             * {
                             *  FindDownloadEndnoteFile(newSearchResultItemRow, contentResult, numResult, basePathSpringerLink);
                             * }
                             * catch (Exception ex)
                             * {
                             *  newSearchResultItemRow.errors_onload += "FindDownloadEndnoteFile: " + ex.Message + ex.Source + System.Environment.NewLine;
                             * }
                             */


                            // Find properties of the result (title, abstract, keywords, year, pages, data source type)
                            try
                            {
                                FindProperties(newSearchResultItemRow, contentResult);
                            }
                            catch (Exception ex)
                            {
                                newSearchResultItemRow.errors_onload += "FindProperties: " + ex.Message + ex.Source + System.Environment.NewLine;
                            }

                            searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem);
                            searchResultItemTableAdapter.Dispose();

                            numResult++;
                        }
                    }
                }

                page++;

                if (page > page_end)
                {
                    hasPages = false;
                }
            }
        }
        private void FindProperties(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string contentResult)
        {
            string tokenTitle = "<title>";
            string token03    = "</title>";
            string title      = contentResult.Substring(contentResult.ToLower().IndexOf(tokenTitle) + tokenTitle.Length);

            //title = title.Substring(title.IndexOf(token03) + token03.Length);
            title = title.Substring(0, title.ToLower().IndexOf(token03)).Replace(System.Environment.NewLine, "").Trim();

            string token04 = "<h2 class=\"Heading\">Abstract</h2>";

            /*string valAbstract = contentResult.Substring(contentResult.IndexOf(token04) + token04.Length).Trim();
             *
             * string token05 = "<p class=\"Para\">";
             * valAbstract = valAbstract.Replace(token05, string.Empty);
             *
             * string token06 = "</p>";
             * valAbstract = valAbstract.Substring(0, valAbstract.IndexOf(token06));
             *
             * string token07 = "<span class=\"Keyword\">";
             * List<string> tempKeywords = contentResult.Split(new string[] { token07 }, StringSplitOptions.None).ToList();
             * List<string> keywords = new List<string>();
             * int idxKeywords = 0;
             * foreach (string tempKeyword in tempKeywords)
             * {
             *  if (idxKeywords > 0)
             *  {
             *      string token08 = "</span>";
             *      string keyword = tempKeyword.Substring(0, tempKeyword.IndexOf(token08));
             *      keywords.Add(keyword);
             *  }
             *  idxKeywords++;
             * }
             */

            string token    = "<p class=\"volIssue\">";
            string volIssue = contentResult.Substring(contentResult.IndexOf(token) + token.Length);

            string tokenPages = "Pages";
            string pages      = string.Empty;

            if (contentResult.IndexOf(tokenPages) > 0)
            {
                pages = volIssue.Substring(volIssue.IndexOf(tokenPages));
                pages = pages.Substring(0, pages.IndexOf("</p>"));
                pages = pages.Replace("Pages", "").Trim();
            }
            else
            {
                tokenPages = @"\d+(?:-\d+)?(?:,\d+(?:-\d+)?)*";

                Regex regexPages = new Regex(tokenPages);
                Match matchPages = regexPages.Match(volIssue);
                if (matchPages.Success)
                {
                    pages = matchPages.Value;
                }
            }

            if (volIssue.StartsWith("<a"))
            {
                volIssue = volIssue.Substring(volIssue.IndexOf(">"));
            }

            string tokenYear = @"(19|20)\d\d";
            string year      = string.Empty;
            Regex  regex     = new Regex(tokenYear);
            Match  match     = regex.Match(volIssue);

            if (match.Success)
            {
                year = match.Value;
            }

            string tokenTypeBook    = "book/";
            string tokenTypeJournal = "journal/";
            string type             = string.Empty;

            if (contentResult.IndexOf(tokenTypeBook) > 0)
            {
                type = "Book Section";
            }
            else if (contentResult.IndexOf(tokenTypeJournal) > 0)
            {
                type = "Journal";
            }
            else
            {
                type = "Conference proceeding";
            }

            newSearchResultItemRow.title = title;
            //newSearchResultItemRow._abstract = valAbstract;
            newSearchResultItemRow.pages            = pages;
            newSearchResultItemRow.date_publication = year;
            newSearchResultItemRow.type             = type;
        }
示例#9
0
        private void SearchWiley()
        {
            string basicURL     = textBox3.Text;
            string querystring  = textBox2.Text;
            string titleLocator = textBox4.Text;

            int  page      = 1;
            int  page_end  = 1;
            int  numResult = 1;
            bool hasPages  = true;

            page     = int.Parse(textBox5.Text);
            page_end = int.Parse(textBox6.Text);


            while (hasPages)
            {
                string resultsFromSearch = string.Empty;
                string url = String.Format(basicURL + querystring, (page - 1) * 20 + 1);
                url = url.Replace("&amp;", "&");

                try
                {
                    resultsFromSearch = GetContentFromURL(url);
                }
                catch (Exception ex)
                {
                    textBox1.Text += "Error on: " + url + System.Environment.NewLine;
                }

                if (resultsFromSearch != string.Empty)
                {
                    List <string> listResults = resultsFromSearch.Split(new string[] { titleLocator }, StringSplitOptions.None).ToList();
                    listResults.RemoveAt(0);
                    hasPages = false;

                    foreach (string result in listResults)
                    {
                        string resultTrim = result.Trim();

                        hasPages = true;

                        string token01   = "href=\"";
                        string urlResult = resultTrim.Substring(resultTrim.IndexOf(token01) + token01.Length);
                        urlResult = basicURL + urlResult.Substring(0, urlResult.IndexOf("\"")).Replace("&lt;", "%3C").Replace("&gt;", "%3E");

                        string contentResult = string.Empty;
                        contentResult = GetContentFromURL(urlResult).Trim();

                        SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter();
                        SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow = supportSLRDataSet.SearchResultItem.NewSearchResultItemRow();
                        newSearchResultItemRow.fk_QueryDataSourceRound_id = 12; //(int)comboBox1.SelectedValue;
                        newSearchResultItemRow.url           = urlResult;
                        newSearchResultItemRow.page_content  = contentResult;
                        newSearchResultItemRow.errors_onload = string.Empty;

                        // Create Id to reference on downloaded file names
                        supportSLRDataSet.SearchResultItem.Rows.Add(newSearchResultItemRow);
                        searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem);

                        // Find properties of the result (title, abstract, keywords, year, pages, data source type)
                        try
                        {
                            FindProperties(newSearchResultItemRow, contentResult);
                        }
                        catch (Exception ex)
                        {
                            newSearchResultItemRow.errors_onload += "FindProperties: " + ex.Message + ex.Source + System.Environment.NewLine;
                        }

                        searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem);
                        searchResultItemTableAdapter.Dispose();

                        numResult++;
                    }
                }

                page++;

                if (page > page_end)
                {
                    hasPages = false;
                }
            }
        }
示例#10
0
        private void FindProperties(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string contentResult)
        {
            string temp  = "name=\"citation_title\" content=\"";
            string title = contentResult.Substring(contentResult.ToLower().IndexOf(temp) + temp.Length);

            temp  = "\" />";
            title = title.Substring(0, title.ToLower().IndexOf(temp)).Replace(System.Environment.NewLine, "").Trim().Replace("&ndash;", "–").Replace("&#x2010;", "-");

            temp = "name=\"citation_firstpage\" content=\"";
            string pages = string.Empty;

            if (contentResult.Contains(temp))
            {
                pages = contentResult.Substring(contentResult.ToLower().IndexOf(temp) + temp.Length);
                temp  = "\" />";
                pages = pages.Substring(0, pages.ToLower().IndexOf(temp)).Replace(System.Environment.NewLine, "").Trim();

                temp   = "name=\"citation_lastpage\" content=\"";
                pages += "-" + contentResult.Substring(contentResult.ToLower().IndexOf(temp) + temp.Length);
                temp   = "\" />";
                pages  = pages.Substring(0, pages.ToLower().IndexOf(temp)).Replace(System.Environment.NewLine, "").Trim();
            }

            string year = string.Empty;

            temp = "name=\"citation_publication_date\" content=\"";
            if (contentResult.Contains(temp))
            {
                year = contentResult.Substring(contentResult.ToLower().IndexOf(temp) + temp.Length);
                temp = "\" />";
                year = year.Substring(0, year.ToLower().IndexOf(temp)).Replace(System.Environment.NewLine, "").Trim();
                Regex regex = new Regex(@"(19|20)\d\d");
                Match match = regex.Match(year);
                if (match.Success)
                {
                    year = match.Value;
                }
            }
            else
            {
                temp = "name=\"citation_online_date\" content=\"";
                if (contentResult.Contains(temp))
                {
                    year = contentResult.Substring(contentResult.ToLower().IndexOf(temp) + temp.Length);
                    temp = "\" />";
                    year = year.Substring(0, year.ToLower().IndexOf(temp)).Replace(System.Environment.NewLine, "").Trim();
                    Regex regex = new Regex(@"(19|20)\d\d");
                    Match match = regex.Match(year);
                    if (match.Success)
                    {
                        year = match.Value;
                    }
                }
            }

            string type = string.Empty;

            temp = "citation_book_title";
            if (contentResult.IndexOf(temp) > 0)
            {
                type = "Book Section";
            }
            else
            {
                temp = "citation_journal_title";
                if (contentResult.IndexOf(temp) > 0)
                {
                    type = "Journal";
                }
                else
                {
                    type = "Conference proceeding";
                }
            }

            newSearchResultItemRow.title            = title;
            newSearchResultItemRow.pages            = pages;
            newSearchResultItemRow.date_publication = year;
            newSearchResultItemRow.type             = type;
        }
示例#11
0
        private void SearchScopus()
        {
            string filepath    = textBox3.Text;
            string typeLocator = textBox4.Text;

            string resultsFromFile = string.Empty;

            try
            {
                resultsFromFile = GetContentFromFile(filepath);
            }
            catch (Exception ex)
            {
                textBox1.Text += "Error on: " + filepath + Environment.NewLine;
            }

            List <string> listResults = resultsFromFile.Split(new string[] { typeLocator }, StringSplitOptions.None).ToList();

            listResults.RemoveAt(0);

            foreach (string result in listResults)
            {
                string resultTrim = result.Trim();

                SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter searchResultItemTableAdapter = new SupportSLRDataSetTableAdapters.SearchResultItemTableAdapter();
                SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow = supportSLRDataSet.SearchResultItem.NewSearchResultItemRow();
                newSearchResultItemRow.fk_QueryDataSourceRound_id = 11; //(int)comboBox1.SelectedValue;

                string line_break = "\n";

                string temp = resultTrim;
                string type = temp.Substring(0, temp.IndexOf(line_break)).Trim();
                type = (type == "JOUR") ? "Journal" : (type == "CONF") ? "Conference proceeding" : "Book Section";
                temp = temp.Substring(temp.IndexOf(line_break));

                string temp2 = "TI  - ";
                temp = temp.Substring(temp.IndexOf(temp2) + temp2.Length);
                string title = temp.Substring(0, temp.IndexOf(line_break)).Trim();

                string pages = string.Empty;
                temp2 = "SP  - ";
                if (temp.Contains(temp2))
                {
                    temp  = temp.Substring(temp.IndexOf(temp2) + temp2.Length);
                    pages = temp.Substring(0, temp.IndexOf(line_break)).Trim();

                    temp2  = "EP  - ";
                    temp   = temp.Substring(temp.IndexOf(temp2) + temp2.Length);
                    pages += "-" + temp.Substring(0, temp.IndexOf(line_break)).Trim();
                }

                temp2 = "PY  - ";
                temp  = temp.Substring(temp.IndexOf(temp2) + temp2.Length);
                string year = temp.Substring(0, temp.IndexOf(line_break)).Trim();

                temp2 = "UR  - ";
                temp  = temp.Substring(temp.IndexOf(temp2) + temp2.Length);
                string url = temp.Substring(0, temp.IndexOf(line_break)).Trim();

                newSearchResultItemRow.type             = type;
                newSearchResultItemRow.title            = title;
                newSearchResultItemRow.pages            = pages;
                newSearchResultItemRow.date_publication = year;
                newSearchResultItemRow.url = url;

                supportSLRDataSet.SearchResultItem.Rows.Add(newSearchResultItemRow);
                searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem);

                try
                {                 // Search by title in Google
                    string query   = "\"" + title + "\"";
                    var    client  = new SearchClient(query);
                    string content = string.Empty;

                    foreach (var hit in client.Query())
                    {
                        string urlResult = hit.CleanUri.ToString();
                        try
                        {
                            content = GetContentFromURL(urlResult);
                        }
                        catch (Exception ex)
                        {
                            textBox1.Text += "Error on: " + url + Environment.NewLine;
                        }

                        //if (!content.StartsWith("%PDF") && content.ToLower().Contains("abstract"))
                        break;
                    }

                    newSearchResultItemRow.page_content = content;
                }
                catch (Exception ex)
                {
                    textBox1.Text += "Error on Googling: " + ex.Message + Environment.NewLine;
                }

                newSearchResultItemRow.errors_onload = string.Empty;

                searchResultItemTableAdapter.Update(supportSLRDataSet.SearchResultItem);
                searchResultItemTableAdapter.Dispose();
            }
        }
示例#12
0
        private void FindProperties(SupportSLRDataSet.SearchResultItemRow newSearchResultItemRow, string contentResult)
        {
            string tokenTitle = "<title>";
            string token03    = "</title>";
            string title      = contentResult.Substring(contentResult.ToLower().IndexOf(tokenTitle) + tokenTitle.Length);

            //title = title.Substring(title.IndexOf(token03) + token03.Length);
            title = title.Substring(0, title.ToLower().IndexOf(token03)).Replace(System.Environment.NewLine, "").Trim().Replace(" - Springer", string.Empty).Replace(" | SpringerLink", string.Empty).Replace(" | HTML", string.Empty).Replace(" | HazNet", string.Empty);

            string temp = "| Free Full-Text |";

            if (title.Contains(temp))
            {
                title = title.Substring(title.IndexOf(temp) + temp.Length);
            }

            temp = "<title";
            if (title.Contains(temp))
            {
                title = title.Substring(title.IndexOf(temp) + temp.Length);
                temp  = ">";
                if (title.Contains(temp))
                {
                    title = title.Substring(title.IndexOf(temp) + temp.Length);
                }

                temp = " | ";
                if (title.Contains(temp))
                {
                    title = title.Substring(0, title.IndexOf(temp));
                }
            }
            string token    = "<p class=\"volIssue\">";
            string volIssue = contentResult.Substring(contentResult.IndexOf(token) + token.Length);

            string tokenPages = "Pages";
            string pages      = volIssue;

            if (contentResult.IndexOf(tokenPages) > 0)
            {
                temp = ", Pages ";
                if (pages.Contains(temp))
                {
                    pages = pages.Substring(pages.IndexOf(temp) + temp.Length).Trim();
                    temp  = ",";
                    if (pages.Contains(temp))
                    {
                        pages = pages.Substring(0, pages.IndexOf(temp)).Trim();
                    }
                }
                else
                {
                    pages = volIssue.Substring(volIssue.IndexOf(tokenPages));
                    pages = pages.Substring(0, pages.IndexOf("</p>"));
                    pages = pages.Replace("Pages", "").Replace("pp", "").Trim();
                }

                temp = "postProcessingHook\">";
                if (pages.Contains(temp))
                {
                    pages = pages.Substring(pages.IndexOf(temp) + temp.Length).Trim();
                }

                temp = "page-ranges\">";
                if (pages.Contains(temp))
                {
                    pages = pages.Substring(pages.IndexOf(temp) + temp.Length).Trim();
                }

                temp = "</dd>";
                if (pages.Contains(temp))
                {
                    pages = pages.Substring(0, pages.IndexOf(temp)).Trim();
                }


                temp = "\">";
                if (pages.StartsWith(temp))
                {
                    pages = pages.Substring(pages.IndexOf(temp) + temp.Length);
                }

                if (pages.Contains(temp))
                {
                    pages = pages.Substring(0, pages.IndexOf(temp));
                }

                temp = "</";
                if (pages.Contains(temp))
                {
                    pages = pages.Substring(0, pages.IndexOf(temp));
                }
            }
            else
            {
                tokenPages = @"\d+(?:-\d+)?(?:,\d+(?:-\d+)?)*";

                Regex regexPages = new Regex(tokenPages);
                Match matchPages = regexPages.Match(volIssue);
                if (matchPages.Success)
                {
                    pages = matchPages.Value;
                }
            }

            if (volIssue.StartsWith("<a"))
            {
                volIssue = volIssue.Substring(volIssue.IndexOf(">"));
            }

            string year = volIssue;

            temp = "copyrightyear\" value=\"";
            if (year.Contains(temp))
            {
                year = year.Substring(year.IndexOf(temp) + temp.Length);
            }

            temp = "publicationDate\" content=\"";
            if (year.Contains(temp))
            {
                year = year.Substring(year.IndexOf(temp) + temp.Length);
            }

            temp = "Publication date </div><div class=\"display_record_indexing_data\"><span class=\"subjectField-postProcessingHook\">";
            if (year.Contains(temp))
            {
                year = year.Substring(year.IndexOf(temp) + temp.Length);
            }

            temp = "Publication History";
            if (year.Contains(temp))
            {
                year = year.Substring(year.IndexOf(temp) + temp.Length);
            }

            string tokenYear = @"(19|20)\d\d";
            Regex  regex     = new Regex(tokenYear);
            Match  match     = regex.Match(year);

            if (match.Success)
            {
                year = match.Value;
            }

            string tokenTypeBook    = "book/";
            string tokenTypeJournal = "journal/";
            string type             = string.Empty;

            if (contentResult.IndexOf(tokenTypeBook) > 0)
            {
                type = "Book Section";
            }
            else if (contentResult.IndexOf(tokenTypeJournal) > 0)
            {
                type = "Journal";
            }
            else
            {
                type = "Conference proceeding";
            }

            temp = "Publication title";
            if (volIssue.Contains(temp))
            {
                volIssue = volIssue.Substring(volIssue.IndexOf(temp) + temp.Length, 300);
                if (volIssue.ToLower().Contains("journal"))
                {
                    type = "Journal";
                }
            }

            newSearchResultItemRow.title = title;
            //newSearchResultItemRow._abstract = valAbstract;
            newSearchResultItemRow.pages            = pages;
            newSearchResultItemRow.date_publication = year;
            newSearchResultItemRow.type             = type;
        }