protected Article() { AuthorArticles = new List <AuthorArticle>(); SubjectItemArticles = new List <SubjectItemArticle>(); _versions = new List <Version>(); ScrapeContext = ArticleScrapeContextEnum.None; }
public Article(string arxivId, string htmlLink, string pdfUrl, string otherFormatUrl, string title, string abstractText, string comments , string journalReference, string journalReferenceHtmlLink, DateTime scrapedDate) { ArxivId = arxivId; HtmlLink = htmlLink; PdfUrl = pdfUrl; OtherFormatUrl = otherFormatUrl; Title = title; AbstractText = abstractText; Comments = comments; JournalReference = journalReference; JournalReferenceHtmlLink = journalReferenceHtmlLink; ScrapedDate = scrapedDate; AuthorArticles = new List <AuthorArticle>(); SubjectItemArticles = new List <SubjectItemArticle>(); _versions = new List <Version>(); ScrapeContext = ArticleScrapeContextEnum.None; }
public void AddScrapeContext(string pageHeader, string arxivIdLabel, string h3Text) { if (!string.IsNullOrEmpty(pageHeader) && pageHeader.ToLower().Contains("catchup")) { ScrapeContext = ArticleScrapeContextEnum.CatchUp; } if (!string.IsNullOrEmpty(arxivIdLabel)) { if (arxivIdLabel.ToLower().Contains("replaced")) { ScrapeContext = ArticleScrapeContextEnum.Replacement; } else if (arxivIdLabel.ToLower().Contains("cross-list")) { ScrapeContext = ArticleScrapeContextEnum.CrossList; } } if (!string.IsNullOrEmpty(h3Text)) { if (h3Text.ToLower().Contains("submission")) { ScrapeContext = ArticleScrapeContextEnum.Submission; } else if (h3Text.ToLower().Contains("cross-list") || h3Text.ToLower().Contains("cross")) { ScrapeContext = ArticleScrapeContextEnum.CrossList; } else if (h3Text.ToLower().Contains("Replacement")) { ScrapeContext = ArticleScrapeContextEnum.Replacement; } } }