private List<Article> GetReferences(IEnumerable<XElement> referencesElements) { var result = new List<Article>(); //using (ScopusDbContext dbContext = new ScopusDbContext()) { foreach (var element in referencesElements) { var refInfoElement = element.Element(XName.Get("ref-info")); var refIdListElement = refInfoElement.Element(XName.Get("refd-itemidlist")); foreach (var refIdElement in refIdListElement.Elements()) { var refType = refIdElement.Attribute(XName.Get("idtype")).Value; if (refType == "SGR") { var refId = refIdElement.Value; var refArticle = new Article(); refArticle.ScopusID = refId; //if (!brokenArticles.Contains(refId) && !processedArticles.Contains(refId)) //{ // if (!dbContext.Articles.Any(a => a.ScopusID == refId)) // { // var refArticle = new Article(); // refArticle.ScopusID = refId; // dbContext.Articles.Add(refArticle); // dbContext.SaveChanges(); // //GetArticleData(ref refArticle); // } // try // { // result.Add(dbContext.Articles.First(a => a.ScopusID == refId)); // } // catch // { // Console.WriteLine("--- Broken reference! ---"); // } //} result.Add(refArticle); } } } } return result; }
List<Article> CollectArticles(IEnumerable<XElement> articleElements) { var result = new List<Article>(); foreach (var articleElement in articleElements) { try { var article = new Article(); var id = articleElement.Element(XName.Get("{http://www.w3.org/2005/Atom}identifier")).Value; article.ScopusID = id.Replace("SCOPUS_ID:", ""); GetArticleData(ref article); result.Add(article); } catch (Exception ex) { Console.WriteLine(ex.Message); } } return result; }
void GetArticleData(ref Article article) { //using (ScopusDbContext dbContext = new ScopusDbContext()) { { var articleId = article.ScopusID; if (processedArticles.Contains(articleId)) { return; } Console.WriteLine("Working on article " + article.ScopusID); var apiUrl = "http://api.elsevier.com/content/abstract/scopus_id/{0}?httpAccept=application/xml"; var request = string.Format(apiUrl, article.ScopusID); var data = GetData(request); if (data != string.Empty) { var element = XElement.Parse(data); var coreElement = element.Element(XName.Get("{http://www.elsevier.com/xml/svapi/abstract/dtd}coredata")); try { article.Title = coreElement.Element(XName.Get("{http://www.elsevier.com/xml/svapi/abstract/dtd}title")).Value; article.PublicationDate = coreElement.Element(XName.Get("{http://www.elsevier.com/xml/svapi/abstract/dtd}coverDate")).Value; } catch { brokenArticles.Add(article.ScopusID); return; } article.CitationsCount = int.Parse(coreElement.Element(XName.Get("{http://www.elsevier.com/xml/svapi/abstract/dtd}citedby-count")).Value); var descriptionElement = coreElement.Elements(XName.Get("{http://www.elsevier.com/xml/svapi/abstract/dtd}description")); var abstractsElement = descriptionElement.Elements(XName.Get("{http://www.elsevier.com/xml/svapi/abstract/dtd}abstract")); if (abstractsElement.Count() > 0) { article.Abstracts = abstractsElement.First().Value; } var affiliationsElement = element.Elements(XName.Get("{http://www.elsevier.com/xml/svapi/abstract/dtd}affiliation")); var affiliations = GetAffiliations(affiliationsElement); var authorsElement = element.Element(XName.Get("{http://www.elsevier.com/xml/svapi/abstract/dtd}authors")); article.Authors = GetAuthors(authorsElement, affiliations); var doiElement = coreElement.Elements(XName.Get("{http://www.elsevier.com/xml/svapi/abstract/dtd}doi")); if (doiElement.Count() > 0) { article.DOI = doiElement.First().Value; } var keywordsElement = element.Elements(XName.Get("{http://www.elsevier.com/xml/svapi/abstract/dtd}authkeywords")); foreach (var keyword in keywordsElement.Elements()) { article.Keywords.Add(keyword.Value); } var recordsItem = element.Element(XName.Get("item")); var bibrecordItem = recordsItem.Element(XName.Get("bibrecord")); var tailItem = bibrecordItem.Element(XName.Get("tail")); var subjectAreasElement = element.Element(XName.Get("{http://www.elsevier.com/xml/svapi/abstract/dtd}subject-areas")); article.SubjectAreas = GetSubjectAreas(subjectAreasElement); var referencesItem = tailItem.Element(XName.Get("bibliography")); if (referencesItem != null) { article.References = GetReferences(referencesItem.Elements()); } //if (!dbContext.Articles.Any(a => a.ScopusID == articleId)) // dbContext.Articles.Add(article); //dbContext.SaveChanges(); //Console.WriteLine("Added article: " + article.Title); //processedArticles.Add(article.ScopusID); } } } }