コード例 #1
0
        public void ReadAndWritePublicationTypes()
        {
            // Read the publication types from the CSV file
            PublicationTypes ptc = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
                "PublicationTypes.csv"
                );

            Assert.AreEqual(ptc.Categories.Count, 52);
            Assert.AreEqual(ptc.GetCategoryNumber("Legislation"), 0);
            Assert.AreEqual(ptc.GetCategoryNumber("Consensus Development Conference, NIH"), 1);
            Assert.AreEqual(ptc.GetCategoryNumber("Review, Multicase"), 2);
            Assert.AreEqual(ptc.GetCategoryNumber("Technical Report"), 3);
            Assert.AreEqual(ptc.GetCategoryNumber("Comment"), 4);

            // Verify OverrideFirstCategory values
            Assert.IsTrue(ptc.OverrideFirstCategory.ContainsKey("Review"));
            Assert.IsTrue(ptc.OverrideFirstCategory.ContainsKey("Review, Multicase"));
            Assert.AreEqual(ptc.OverrideFirstCategory.ContainsKey("Comment"), false);


            // First recreate the database, then write the publication types to it
            Database  DB        = new Database("Publication Harvester Unit Test");
            Harvester harvester = new Harvester(DB);

            harvester.CreateTables();
            ptc.WriteToDB(DB);

            // Read the publication types from the database
            PublicationTypes ptcFromDB =
                new PublicationTypes(DB);

            Assert.AreEqual(ptcFromDB.Categories.Count, 52);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Overall"), 0);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Clinical Trial, Phase II"), 1);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Review of Reported Cases"), 2);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Technical Report"), 3);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Letter"), 4);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Comment"), 4);

            // Verify OverrideFirstCategory values
            Assert.IsTrue(ptcFromDB.OverrideFirstCategory.ContainsKey("Review"));
            Assert.IsTrue(ptcFromDB.OverrideFirstCategory.ContainsKey("Review, Multicase"));
            Assert.AreEqual(ptcFromDB.OverrideFirstCategory.ContainsKey("Comment"), false);
        }
コード例 #2
0
        /// <summary>
        /// Check a publication against the filter
        /// </summary>
        /// <param name="publication">Publication to check</param>
        /// <param name="linkRanking">Link ranking for the publication</param>
        /// <param name="referencePublication">Reference publication to compare against</param>
        /// <param name="publicationTypes">PublicationTypes object for the current database</param>
        /// <returns>True if the publication matches the filter, false otherwise</returns>
        public bool FilterPublication(Publication publication, int linkRanking, Publication referencePublication, PublicationTypes publicationTypes)
        {
            if (SameJournal && (publication.Journal != referencePublication.Journal))
            {
                return(false);
            }

            if (PubWindowLowerBound.HasValue && (referencePublication.Year - PubWindowLowerBound > publication.Year))
            {
                return(false);
            }

            if (PubWindowUpperBound.HasValue && (referencePublication.Year + PubWindowUpperBound < publication.Year))
            {
                return(false);
            }

            if (linkRanking > MaximumLinkRanking)
            {
                return(false);
            }

            if (((IncludeCategories != null) && (IncludeCategories.Count() > 0)) &&
                ((String.IsNullOrEmpty(publication.PubType) ||
                  !IncludeCategories.Contains(publicationTypes.GetCategoryNumber(publication.PubType)))))
            {
                return(false);
            }

            if ((IncludeLanguages != null) && (IncludeLanguages.Count() > 0) &&
                (String.IsNullOrEmpty(publication.Language) ||
                 !IncludeLanguages.Contains(publication.Language)))
            {
                return(false);
            }

            return(true);
        }
コード例 #3
0
        /// <summary>
        /// Retrieve all of the publications a set of colleagues
        /// </summary>
        /// <param name="Star">Star whose colleagues are being retrieved</param>
        /// <param name="Colleagues">List of colleagues whose publications should be retrieved</param>
        public void GetColleaguePublications(Person[] Colleagues, string[] Languages, List <int> AllowedPubTypeCategories)
        {
            // Keep a list of written Setnbs, just to make sure we don't write the
            // same colleage twice
            ArrayList WrittenSetnbs = new ArrayList();

            // Process each colleague
            foreach (Person Colleague in Colleagues)
            {
                // Only process a colleague that hasn't yet been touched
                if (!WrittenSetnbs.Contains(Colleague.Setnb))
                {
                    WrittenSetnbs.Add(Colleague.Setnb);

                    // Get the colleague's publications
                    // Search NCBI -- if an error is thrown, write that error to the database
                    string results;
                    try
                    {
                        results = ncbi.Search(Colleague.MedlineSearch);
                    }
                    catch (Exception ex)
                    {
                        string Message = "Error reading publications for "
                                         + Colleague.Last + " (" + Colleague.Setnb + ex.Message;
                        ColleagueFinder.WriteErrorToDB(Message, DB, Colleague);
                        throw new Exception(Message);
                    }

                    // Turn the results into a set of publications for the colleague
                    if ((results != null) && (results.Trim() != ""))
                    {
                        Publications ColleaguePublications = new Publications(results, pubTypes);

                        // Write the publications to the database -- but only if they
                        // actually belong to the colleague.
                        if (ColleaguePublications.PublicationList != null)
                        {
                            foreach (Publication pub in ColleaguePublications.PublicationList)
                            {
                                // If the publication has no authors, it's clearly not actually
                                // a publication that belongs to this colleague.
                                // Also, since the publication harvester only harvests
                                // English publications, we exclude any non-English ones as well.
                                if ((pub.Authors != null) && (pub.Language == "eng") &&
                                    (AllowedPubTypeCategories.Contains(pubTypes.GetCategoryNumber(pub.PubType))))
                                {
                                    // Add a row to the ColleaguePublications table -- this will
                                    // return False if the publication doesn't actually belong
                                    // to the colleague
                                    bool PubBelongsToColleague = WriteColleaguePublicationsToDB(DB, Colleague, pub, pubTypes, Languages);
                                    if (PubBelongsToColleague)
                                    {
                                        // Make sure the publication doesn't already exist, then write
                                        // it to the database.
                                        if (DB.GetIntValue("SELECT Count(*) FROM Publications WHERE PMID = " + pub.PMID.ToString()) == 0)
                                        {
                                            Publications.WriteToDB(pub, DB, pubTypes, Languages);
                                        }
                                    }
                                }
                            }
                        }

                        // Update the Harvested column in the Colleagues table
                        ArrayList Parameters = new ArrayList();
                        Parameters.Add(Database.Parameter(Colleague.Setnb));
                        DB.ExecuteNonQuery("UPDATE Colleagues SET Harvested = 1 WHERE Setnb = ?", Parameters);
                    }
                }
            }
        }
コード例 #4
0
        public void GetPublications()
        {
            // Set up the database
            TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22);

            Database         DB  = new Database("Publication Harvester Unit Test");
            PublicationTypes ptc = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
                "PublicationTypes.csv"
                );

            // Verify the correct publications were written (including publication type and author position)
            People people = new People(DB);

            Assert.IsTrue(people.PersonList.Count == 4);
            int FoundPublications = 0;

            foreach (Person person in people.PersonList)
            {
                Publications pubs = new Publications(DB, person, false);
                switch (person.Setnb)
                {
                case "A6009400":     // Van Eys
                    Assert.IsTrue(pubs.PublicationList.Length == 8);
                    if (pubs.PublicationList != null)
                    {
                        foreach (Publication pub in pubs.PublicationList)
                        {
                            switch (pub.PMID)
                            {
                            case 9876482:
                                FoundPublications++;
                                Assert.IsTrue(pub.Title == "Benefits of nutritional intervention on nutritional status, quality of life and survival.");
                                Assert.IsTrue(pub.Pages == "66-8");
                                Assert.IsTrue(pub.Year == 1998);
                                Assert.IsTrue(pub.Month == null);
                                Assert.IsTrue(pub.Day == null);
                                Assert.IsTrue(pub.Journal == "Int J Cancer Suppl");
                                Assert.IsTrue(pub.Volume == "11");
                                Assert.IsTrue(pub.Issue == null);

                                Assert.IsTrue(pub.Authors.Length == 1);
                                Assert.IsTrue(pub.Authors[0] == "Van Eys J");

                                // Verify publication type
                                Assert.IsTrue(pub.PubType == "Journal Article");
                                Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 3);

                                // Verify MeSH headings
                                Assert.IsTrue(pub.MeSHHeadings.Count == 8);
                                Assert.IsTrue(pub.MeSHHeadings.Contains("Child"));
                                Assert.IsTrue(pub.MeSHHeadings.Contains("Nutrition Disorders/*complications/*therapy"));
                                Assert.IsTrue(pub.MeSHHeadings.Contains("Survival Rate"));

                                // Verify position type
                                Harvester.AuthorPositions PositionType;
                                int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
                                Assert.IsTrue(AuthorPosition == 1);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
                                AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
                                Assert.IsTrue(AuthorPosition == 1);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
                                break;



                            case 8403744:
                                FoundPublications++;
                                Assert.IsTrue(pub.Title == "Early hospital discharge and the timing of newborn metabolic screening.");
                                Assert.IsTrue(pub.Pages == "463-6");
                                Assert.IsTrue(pub.Year == 1993);
                                Assert.IsTrue(pub.Month == "Aug");
                                Assert.IsTrue(pub.Day == null);
                                Assert.IsTrue(pub.Journal == "Clin Pediatr (Phila)");
                                Assert.IsTrue(pub.Volume == "32");
                                Assert.IsTrue(pub.Issue == "8");

                                Assert.IsTrue(pub.Authors.Length == 4);
                                Assert.IsTrue(pub.Authors[0] == "Coody D");
                                Assert.IsTrue(pub.Authors[1] == "Yetman RJ");
                                Assert.IsTrue(pub.Authors[2] == "Montgomery D");
                                Assert.IsTrue(pub.Authors[3] == "van Eys J");

                                // Verify publication type
                                Assert.IsTrue(pub.PubType == "Consensus Development Conference, NIH");
                                Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 1);

                                // Verify MeSH headings
                                Assert.IsTrue(pub.MeSHHeadings.Count == 15);
                                Assert.IsTrue(pub.MeSHHeadings.Contains("Cesarean Section"));
                                Assert.IsTrue(pub.MeSHHeadings.Contains("Hospitals, Private"));
                                Assert.IsTrue(pub.MeSHHeadings.Contains("*Insurance, Health"));
                                Assert.IsTrue(pub.MeSHHeadings.Contains("United States"));

                                // Verify position type
                                AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
                                Assert.IsTrue(AuthorPosition == 4);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last);
                                AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
                                Assert.IsTrue(AuthorPosition == 4);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last);
                                break;
                            }
                        }
                    }
                    break;

                case "A5401532":     // Tobian
                    Assert.IsTrue(pubs.PublicationList.Length == 5);
                    if (pubs.PublicationList != null)
                    {
                        foreach (Publication pub in pubs.PublicationList)
                        {
                            switch (pub.PMID)
                            {
                            case 9931073:
                                FoundPublications++;
                                Assert.IsTrue(pub.Title == "Story of the birth of the journal called Hypertension.");
                                Assert.IsTrue(pub.Pages == "7");
                                Assert.IsTrue(pub.Year == 1999);
                                Assert.IsTrue(pub.Month == "Jan");
                                Assert.IsTrue(pub.Day == null);
                                Assert.IsTrue(pub.Volume == "33");
                                Assert.IsTrue(pub.Issue == "1");

                                Assert.IsTrue(pub.Authors.Length == 1);
                                Assert.IsTrue(pub.Authors[0] == "Tobian L");

                                // Verify publication type
                                Assert.IsTrue(pub.PubType == "Historical Article");
                                Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 0);

                                // Verify MeSH headings
                                Assert.IsTrue(pub.MeSHHeadings.Count == 5);
                                Assert.IsTrue(pub.MeSHHeadings.Contains("American Heart Association/*history"));
                                Assert.IsTrue(pub.MeSHHeadings.Contains("*Hypertension"));
                                Assert.IsTrue(pub.MeSHHeadings.Contains("United States"));

                                // Verify position type
                                Harvester.AuthorPositions PositionType;
                                int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
                                Assert.IsTrue(AuthorPosition == 1);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
                                AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
                                Assert.IsTrue(AuthorPosition == 1);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
                                break;
                            }
                        }
                    }
                    break;

                case "A5501586":     // Reemtsma
                    Assert.IsTrue(pubs.PublicationList.Length == 3);
                    if (pubs.PublicationList != null)
                    {
                        foreach (Publication pub in pubs.PublicationList)
                        {
                            switch (pub.PMID)
                            {
                            case 11528018:
                                FoundPublications++;
                                Assert.IsTrue(pub.Title == "Xenotransplantation: A Historical Perspective.");
                                Assert.IsTrue(pub.Pages == "9-12");
                                Assert.IsTrue(pub.Year == 1995);
                                Assert.IsTrue(pub.Month == null);
                                Assert.IsTrue(pub.Day == null);
                                Assert.IsTrue(pub.Volume == "37");
                                Assert.IsTrue(pub.Issue == "1");

                                Assert.IsTrue(pub.Authors.Length == 1);
                                Assert.IsTrue(pub.Authors[0] == "Reemtsma K");

                                // Verify publication type
                                Assert.IsTrue(pub.PubType == "JOURNAL ARTICLE");
                                Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 3);

                                // Verify MeSH headings
                                Assert.IsTrue(pub.MeSHHeadings == null);

                                // Verify position type
                                Harvester.AuthorPositions PositionType;
                                int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
                                Assert.IsTrue(AuthorPosition == 1);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
                                AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
                                Assert.IsTrue(AuthorPosition == 1);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
                                break;
                            }
                        }
                    }
                    break;



                case "A5702471":     // Guillemin
                    Assert.IsTrue(pubs.PublicationList.Length == 6);
                    if (pubs.PublicationList != null)
                    {
                        foreach (Publication pub in pubs.PublicationList)
                        {
                            switch (pub.PMID)
                            {
                            case 15642779:
                                // For this publication, we're just concerned that
                                // the publication type is "Review" -- even though
                                // it's the second publication type in the citation,
                                // it's flagged as an "override first pubtype"
                                // in PublicationTypes.csv
                                Assert.IsTrue(pub.PubType == "Review");
                                break;


                            // NOTE: The title has a quote (laureates') that gets stripped off
                            case 12462241:
                                FoundPublications++;
                                Assert.IsTrue(pub.Title == "Nobel laureates letter to President Bush.");
                                Assert.IsTrue(pub.Pages == "A02");
                                Assert.IsTrue(pub.Year == 2001);
                                Assert.IsTrue(pub.Month == "Feb");
                                Assert.IsTrue(pub.Day == "22");
                                Assert.IsTrue(pub.Journal == "Washington Post");
                                Assert.IsTrue(pub.Volume == null);
                                Assert.IsTrue(pub.Issue == null);

                                Assert.IsTrue(pub.Authors.Length == 82);
                                Assert.IsTrue(pub.Authors[0] == "Arrow KJ");
                                Assert.IsTrue(pub.Authors[26] == "Guillemin R");
                                Assert.IsTrue(pub.Authors[81] == "Wilson RW");

                                // Verify publication type
                                Assert.IsTrue(pub.PubType == "Newspaper Article");
                                Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 0);

                                // Verify MeSH headings
                                Assert.IsTrue(pub.MeSHHeadings.Count == 9);
                                Assert.IsTrue(pub.MeSHHeadings.Contains("Embryo Disposition"));
                                Assert.IsTrue(pub.MeSHHeadings.Contains("National Institutes of Health (U.S.)"));
                                Assert.IsTrue(pub.MeSHHeadings.Contains("United States"));

                                // Verify position type
                                Harvester.AuthorPositions PositionType;
                                int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
                                Assert.IsTrue(AuthorPosition == 27);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.Middle);
                                AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
                                Assert.IsTrue(AuthorPosition == 27);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.Middle);
                                break;


                            case 3086749:
                                // This publication was altered to contain six GrantIDs in order to
                                // test the GrantID column length in the database
                                FoundPublications++;
                                Assert.IsTrue(pub.Title == "Pituitary FSH is released by a heterodimer of the beta-subunits from the two forms of inhibin.");
                                Assert.IsTrue(pub.Pages == "779-82");
                                Assert.IsTrue(pub.Year == 1986);
                                Assert.IsTrue(pub.Month == "Jun");
                                Assert.IsTrue(pub.Day == "19-25");
                                Assert.IsTrue(pub.Journal == "Nature");
                                Assert.IsTrue(pub.Volume == "321");
                                Assert.IsTrue(pub.Issue == "6072");

                                Assert.IsTrue(pub.Authors.Length == 7);
                                Assert.IsTrue(pub.Authors[0] == "Ling N");
                                Assert.IsTrue(pub.Authors[4] == "Esch F");
                                Assert.IsTrue(pub.Authors[6] == "Guillemin R");

                                // Verify publication type
                                Assert.IsTrue(pub.PubType == "Journal Article");
                                Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 3);

                                // Verify MeSH headings
                                Assert.IsTrue(pub.MeSHHeadings.Count == 14);
                                Assert.IsTrue(pub.MeSHHeadings.Contains("Amino Acid Sequence"));
                                Assert.IsTrue(pub.MeSHHeadings.Contains("Follicle Stimulating Hormone/*secretion"));
                                Assert.IsTrue(pub.MeSHHeadings.Contains("Swine"));

                                // Verify position type
                                AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
                                Assert.IsTrue(AuthorPosition == 7);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last);
                                AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
                                Assert.IsTrue(AuthorPosition == 7);
                                Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last);
                                break;
                            }
                        }
                    }
                    break;
                }
            }
            Assert.IsTrue(FoundPublications == 6);

            // Verify that People.Harvested has been updated for each person
            DataTable Results = DB.ExecuteQuery("SELECT Setnb, Harvested FROM People");

            Assert.IsTrue(Results.Rows.Count == 4);
            foreach (DataRow Row in Results.Rows)
            {
                Assert.IsTrue((bool)Row["Harvested"] == true);
            }
        }