public void ReadAndWritePublicationTypes() { // Read the publication types from the CSV file PublicationTypes ptc = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); Assert.AreEqual(ptc.Categories.Count, 52); Assert.AreEqual(ptc.GetCategoryNumber("Legislation"), 0); Assert.AreEqual(ptc.GetCategoryNumber("Consensus Development Conference, NIH"), 1); Assert.AreEqual(ptc.GetCategoryNumber("Review, Multicase"), 2); Assert.AreEqual(ptc.GetCategoryNumber("Technical Report"), 3); Assert.AreEqual(ptc.GetCategoryNumber("Comment"), 4); // Verify OverrideFirstCategory values Assert.IsTrue(ptc.OverrideFirstCategory.ContainsKey("Review")); Assert.IsTrue(ptc.OverrideFirstCategory.ContainsKey("Review, Multicase")); Assert.AreEqual(ptc.OverrideFirstCategory.ContainsKey("Comment"), false); // First recreate the database, then write the publication types to it Database DB = new Database("Publication Harvester Unit Test"); Harvester harvester = new Harvester(DB); harvester.CreateTables(); ptc.WriteToDB(DB); // Read the publication types from the database PublicationTypes ptcFromDB = new PublicationTypes(DB); Assert.AreEqual(ptcFromDB.Categories.Count, 52); Assert.AreEqual(ptcFromDB.GetCategoryNumber("Overall"), 0); Assert.AreEqual(ptcFromDB.GetCategoryNumber("Clinical Trial, Phase II"), 1); Assert.AreEqual(ptcFromDB.GetCategoryNumber("Review of Reported Cases"), 2); Assert.AreEqual(ptcFromDB.GetCategoryNumber("Technical Report"), 3); Assert.AreEqual(ptcFromDB.GetCategoryNumber("Letter"), 4); Assert.AreEqual(ptcFromDB.GetCategoryNumber("Comment"), 4); // Verify OverrideFirstCategory values Assert.IsTrue(ptcFromDB.OverrideFirstCategory.ContainsKey("Review")); Assert.IsTrue(ptcFromDB.OverrideFirstCategory.ContainsKey("Review, Multicase")); Assert.AreEqual(ptcFromDB.OverrideFirstCategory.ContainsKey("Comment"), false); }
/// <summary> /// Check a publication against the filter /// </summary> /// <param name="publication">Publication to check</param> /// <param name="linkRanking">Link ranking for the publication</param> /// <param name="referencePublication">Reference publication to compare against</param> /// <param name="publicationTypes">PublicationTypes object for the current database</param> /// <returns>True if the publication matches the filter, false otherwise</returns> public bool FilterPublication(Publication publication, int linkRanking, Publication referencePublication, PublicationTypes publicationTypes) { if (SameJournal && (publication.Journal != referencePublication.Journal)) { return(false); } if (PubWindowLowerBound.HasValue && (referencePublication.Year - PubWindowLowerBound > publication.Year)) { return(false); } if (PubWindowUpperBound.HasValue && (referencePublication.Year + PubWindowUpperBound < publication.Year)) { return(false); } if (linkRanking > MaximumLinkRanking) { return(false); } if (((IncludeCategories != null) && (IncludeCategories.Count() > 0)) && ((String.IsNullOrEmpty(publication.PubType) || !IncludeCategories.Contains(publicationTypes.GetCategoryNumber(publication.PubType))))) { return(false); } if ((IncludeLanguages != null) && (IncludeLanguages.Count() > 0) && (String.IsNullOrEmpty(publication.Language) || !IncludeLanguages.Contains(publication.Language))) { return(false); } return(true); }
/// <summary> /// Retrieve all of the publications a set of colleagues /// </summary> /// <param name="Star">Star whose colleagues are being retrieved</param> /// <param name="Colleagues">List of colleagues whose publications should be retrieved</param> public void GetColleaguePublications(Person[] Colleagues, string[] Languages, List <int> AllowedPubTypeCategories) { // Keep a list of written Setnbs, just to make sure we don't write the // same colleage twice ArrayList WrittenSetnbs = new ArrayList(); // Process each colleague foreach (Person Colleague in Colleagues) { // Only process a colleague that hasn't yet been touched if (!WrittenSetnbs.Contains(Colleague.Setnb)) { WrittenSetnbs.Add(Colleague.Setnb); // Get the colleague's publications // Search NCBI -- if an error is thrown, write that error to the database string results; try { results = ncbi.Search(Colleague.MedlineSearch); } catch (Exception ex) { string Message = "Error reading publications for " + Colleague.Last + " (" + Colleague.Setnb + ex.Message; ColleagueFinder.WriteErrorToDB(Message, DB, Colleague); throw new Exception(Message); } // Turn the results into a set of publications for the colleague if ((results != null) && (results.Trim() != "")) { Publications ColleaguePublications = new Publications(results, pubTypes); // Write the publications to the database -- but only if they // actually belong to the colleague. if (ColleaguePublications.PublicationList != null) { foreach (Publication pub in ColleaguePublications.PublicationList) { // If the publication has no authors, it's clearly not actually // a publication that belongs to this colleague. // Also, since the publication harvester only harvests // English publications, we exclude any non-English ones as well. if ((pub.Authors != null) && (pub.Language == "eng") && (AllowedPubTypeCategories.Contains(pubTypes.GetCategoryNumber(pub.PubType)))) { // Add a row to the ColleaguePublications table -- this will // return False if the publication doesn't actually belong // to the colleague bool PubBelongsToColleague = WriteColleaguePublicationsToDB(DB, Colleague, pub, pubTypes, Languages); if (PubBelongsToColleague) { // Make sure the publication doesn't already exist, then write // it to the database. if (DB.GetIntValue("SELECT Count(*) FROM Publications WHERE PMID = " + pub.PMID.ToString()) == 0) { Publications.WriteToDB(pub, DB, pubTypes, Languages); } } } } } // Update the Harvested column in the Colleagues table ArrayList Parameters = new ArrayList(); Parameters.Add(Database.Parameter(Colleague.Setnb)); DB.ExecuteNonQuery("UPDATE Colleagues SET Harvested = 1 WHERE Setnb = ?", Parameters); } } } }
public void GetPublications() { // Set up the database TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22); Database DB = new Database("Publication Harvester Unit Test"); PublicationTypes ptc = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); // Verify the correct publications were written (including publication type and author position) People people = new People(DB); Assert.IsTrue(people.PersonList.Count == 4); int FoundPublications = 0; foreach (Person person in people.PersonList) { Publications pubs = new Publications(DB, person, false); switch (person.Setnb) { case "A6009400": // Van Eys Assert.IsTrue(pubs.PublicationList.Length == 8); if (pubs.PublicationList != null) { foreach (Publication pub in pubs.PublicationList) { switch (pub.PMID) { case 9876482: FoundPublications++; Assert.IsTrue(pub.Title == "Benefits of nutritional intervention on nutritional status, quality of life and survival."); Assert.IsTrue(pub.Pages == "66-8"); Assert.IsTrue(pub.Year == 1998); Assert.IsTrue(pub.Month == null); Assert.IsTrue(pub.Day == null); Assert.IsTrue(pub.Journal == "Int J Cancer Suppl"); Assert.IsTrue(pub.Volume == "11"); Assert.IsTrue(pub.Issue == null); Assert.IsTrue(pub.Authors.Length == 1); Assert.IsTrue(pub.Authors[0] == "Van Eys J"); // Verify publication type Assert.IsTrue(pub.PubType == "Journal Article"); Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 3); // Verify MeSH headings Assert.IsTrue(pub.MeSHHeadings.Count == 8); Assert.IsTrue(pub.MeSHHeadings.Contains("Child")); Assert.IsTrue(pub.MeSHHeadings.Contains("Nutrition Disorders/*complications/*therapy")); Assert.IsTrue(pub.MeSHHeadings.Contains("Survival Rate")); // Verify position type Harvester.AuthorPositions PositionType; int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications"); Assert.IsTrue(AuthorPosition == 1); Assert.IsTrue(PositionType == Harvester.AuthorPositions.First); AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType); Assert.IsTrue(AuthorPosition == 1); Assert.IsTrue(PositionType == Harvester.AuthorPositions.First); break; case 8403744: FoundPublications++; Assert.IsTrue(pub.Title == "Early hospital discharge and the timing of newborn metabolic screening."); Assert.IsTrue(pub.Pages == "463-6"); Assert.IsTrue(pub.Year == 1993); Assert.IsTrue(pub.Month == "Aug"); Assert.IsTrue(pub.Day == null); Assert.IsTrue(pub.Journal == "Clin Pediatr (Phila)"); Assert.IsTrue(pub.Volume == "32"); Assert.IsTrue(pub.Issue == "8"); Assert.IsTrue(pub.Authors.Length == 4); Assert.IsTrue(pub.Authors[0] == "Coody D"); Assert.IsTrue(pub.Authors[1] == "Yetman RJ"); Assert.IsTrue(pub.Authors[2] == "Montgomery D"); Assert.IsTrue(pub.Authors[3] == "van Eys J"); // Verify publication type Assert.IsTrue(pub.PubType == "Consensus Development Conference, NIH"); Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 1); // Verify MeSH headings Assert.IsTrue(pub.MeSHHeadings.Count == 15); Assert.IsTrue(pub.MeSHHeadings.Contains("Cesarean Section")); Assert.IsTrue(pub.MeSHHeadings.Contains("Hospitals, Private")); Assert.IsTrue(pub.MeSHHeadings.Contains("*Insurance, Health")); Assert.IsTrue(pub.MeSHHeadings.Contains("United States")); // Verify position type AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications"); Assert.IsTrue(AuthorPosition == 4); Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last); AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType); Assert.IsTrue(AuthorPosition == 4); Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last); break; } } } break; case "A5401532": // Tobian Assert.IsTrue(pubs.PublicationList.Length == 5); if (pubs.PublicationList != null) { foreach (Publication pub in pubs.PublicationList) { switch (pub.PMID) { case 9931073: FoundPublications++; Assert.IsTrue(pub.Title == "Story of the birth of the journal called Hypertension."); Assert.IsTrue(pub.Pages == "7"); Assert.IsTrue(pub.Year == 1999); Assert.IsTrue(pub.Month == "Jan"); Assert.IsTrue(pub.Day == null); Assert.IsTrue(pub.Volume == "33"); Assert.IsTrue(pub.Issue == "1"); Assert.IsTrue(pub.Authors.Length == 1); Assert.IsTrue(pub.Authors[0] == "Tobian L"); // Verify publication type Assert.IsTrue(pub.PubType == "Historical Article"); Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 0); // Verify MeSH headings Assert.IsTrue(pub.MeSHHeadings.Count == 5); Assert.IsTrue(pub.MeSHHeadings.Contains("American Heart Association/*history")); Assert.IsTrue(pub.MeSHHeadings.Contains("*Hypertension")); Assert.IsTrue(pub.MeSHHeadings.Contains("United States")); // Verify position type Harvester.AuthorPositions PositionType; int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications"); Assert.IsTrue(AuthorPosition == 1); Assert.IsTrue(PositionType == Harvester.AuthorPositions.First); AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType); Assert.IsTrue(AuthorPosition == 1); Assert.IsTrue(PositionType == Harvester.AuthorPositions.First); break; } } } break; case "A5501586": // Reemtsma Assert.IsTrue(pubs.PublicationList.Length == 3); if (pubs.PublicationList != null) { foreach (Publication pub in pubs.PublicationList) { switch (pub.PMID) { case 11528018: FoundPublications++; Assert.IsTrue(pub.Title == "Xenotransplantation: A Historical Perspective."); Assert.IsTrue(pub.Pages == "9-12"); Assert.IsTrue(pub.Year == 1995); Assert.IsTrue(pub.Month == null); Assert.IsTrue(pub.Day == null); Assert.IsTrue(pub.Volume == "37"); Assert.IsTrue(pub.Issue == "1"); Assert.IsTrue(pub.Authors.Length == 1); Assert.IsTrue(pub.Authors[0] == "Reemtsma K"); // Verify publication type Assert.IsTrue(pub.PubType == "JOURNAL ARTICLE"); Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 3); // Verify MeSH headings Assert.IsTrue(pub.MeSHHeadings == null); // Verify position type Harvester.AuthorPositions PositionType; int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications"); Assert.IsTrue(AuthorPosition == 1); Assert.IsTrue(PositionType == Harvester.AuthorPositions.First); AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType); Assert.IsTrue(AuthorPosition == 1); Assert.IsTrue(PositionType == Harvester.AuthorPositions.First); break; } } } break; case "A5702471": // Guillemin Assert.IsTrue(pubs.PublicationList.Length == 6); if (pubs.PublicationList != null) { foreach (Publication pub in pubs.PublicationList) { switch (pub.PMID) { case 15642779: // For this publication, we're just concerned that // the publication type is "Review" -- even though // it's the second publication type in the citation, // it's flagged as an "override first pubtype" // in PublicationTypes.csv Assert.IsTrue(pub.PubType == "Review"); break; // NOTE: The title has a quote (laureates') that gets stripped off case 12462241: FoundPublications++; Assert.IsTrue(pub.Title == "Nobel laureates letter to President Bush."); Assert.IsTrue(pub.Pages == "A02"); Assert.IsTrue(pub.Year == 2001); Assert.IsTrue(pub.Month == "Feb"); Assert.IsTrue(pub.Day == "22"); Assert.IsTrue(pub.Journal == "Washington Post"); Assert.IsTrue(pub.Volume == null); Assert.IsTrue(pub.Issue == null); Assert.IsTrue(pub.Authors.Length == 82); Assert.IsTrue(pub.Authors[0] == "Arrow KJ"); Assert.IsTrue(pub.Authors[26] == "Guillemin R"); Assert.IsTrue(pub.Authors[81] == "Wilson RW"); // Verify publication type Assert.IsTrue(pub.PubType == "Newspaper Article"); Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 0); // Verify MeSH headings Assert.IsTrue(pub.MeSHHeadings.Count == 9); Assert.IsTrue(pub.MeSHHeadings.Contains("Embryo Disposition")); Assert.IsTrue(pub.MeSHHeadings.Contains("National Institutes of Health (U.S.)")); Assert.IsTrue(pub.MeSHHeadings.Contains("United States")); // Verify position type Harvester.AuthorPositions PositionType; int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications"); Assert.IsTrue(AuthorPosition == 27); Assert.IsTrue(PositionType == Harvester.AuthorPositions.Middle); AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType); Assert.IsTrue(AuthorPosition == 27); Assert.IsTrue(PositionType == Harvester.AuthorPositions.Middle); break; case 3086749: // This publication was altered to contain six GrantIDs in order to // test the GrantID column length in the database FoundPublications++; Assert.IsTrue(pub.Title == "Pituitary FSH is released by a heterodimer of the beta-subunits from the two forms of inhibin."); Assert.IsTrue(pub.Pages == "779-82"); Assert.IsTrue(pub.Year == 1986); Assert.IsTrue(pub.Month == "Jun"); Assert.IsTrue(pub.Day == "19-25"); Assert.IsTrue(pub.Journal == "Nature"); Assert.IsTrue(pub.Volume == "321"); Assert.IsTrue(pub.Issue == "6072"); Assert.IsTrue(pub.Authors.Length == 7); Assert.IsTrue(pub.Authors[0] == "Ling N"); Assert.IsTrue(pub.Authors[4] == "Esch F"); Assert.IsTrue(pub.Authors[6] == "Guillemin R"); // Verify publication type Assert.IsTrue(pub.PubType == "Journal Article"); Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 3); // Verify MeSH headings Assert.IsTrue(pub.MeSHHeadings.Count == 14); Assert.IsTrue(pub.MeSHHeadings.Contains("Amino Acid Sequence")); Assert.IsTrue(pub.MeSHHeadings.Contains("Follicle Stimulating Hormone/*secretion")); Assert.IsTrue(pub.MeSHHeadings.Contains("Swine")); // Verify position type AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications"); Assert.IsTrue(AuthorPosition == 7); Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last); AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType); Assert.IsTrue(AuthorPosition == 7); Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last); break; } } } break; } } Assert.IsTrue(FoundPublications == 6); // Verify that People.Harvested has been updated for each person DataTable Results = DB.ExecuteQuery("SELECT Setnb, Harvested FROM People"); Assert.IsTrue(Results.Rows.Count == 4); foreach (DataRow Row in Results.Rows) { Assert.IsTrue((bool)Row["Harvested"] == true); } }