public void ResetDatabase()
        {
            // Import "TestPeopleMaintenance/input1 plus testhyphens.xls" into the People table
            People PeopleFromFile = new People(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPeopleMaintenance",
                "input1 plus testhypens.xls");

            DB = new Database("Publication Harvester Unit Test");
            Harvester harvester = new Harvester(DB);

            harvester.CreateTables();
            MockNCBI mockNCBI = new MockNCBI("medline");

            mockNCBI.SearchThrowsAnError = false;
            PublicationTypes ptc = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
                "PublicationTypes.csv"
                );

            ptc.WriteToDB(DB);


            // Anonymous callback functions for GetPublications
            Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime)
            {
                //
            };
            Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly)
            {
                //
            };
            Harvester.CheckForInterrupt InterruptCallback = delegate()
            {
                return(false);
            };

            // Write the people, then "harvest" the publications using MockNCBI
            double AverageMilliseconds;

            foreach (Person person in PeopleFromFile.PersonList)
            {
                person.WriteToDB(DB);
                harvester.GetPublications(mockNCBI, ptc, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds);
            }

            People PeopleFromDB = new People(DB);

            Assert.AreEqual(PeopleFromDB.PersonList.Count, 4);
        }
        public void ReadAndWritePublicationTypes()
        {
            // Read the publication types from the CSV file
            PublicationTypes ptc = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
                "PublicationTypes.csv"
                );

            Assert.AreEqual(ptc.Categories.Count, 52);
            Assert.AreEqual(ptc.GetCategoryNumber("Legislation"), 0);
            Assert.AreEqual(ptc.GetCategoryNumber("Consensus Development Conference, NIH"), 1);
            Assert.AreEqual(ptc.GetCategoryNumber("Review, Multicase"), 2);
            Assert.AreEqual(ptc.GetCategoryNumber("Technical Report"), 3);
            Assert.AreEqual(ptc.GetCategoryNumber("Comment"), 4);

            // Verify OverrideFirstCategory values
            Assert.IsTrue(ptc.OverrideFirstCategory.ContainsKey("Review"));
            Assert.IsTrue(ptc.OverrideFirstCategory.ContainsKey("Review, Multicase"));
            Assert.AreEqual(ptc.OverrideFirstCategory.ContainsKey("Comment"), false);


            // First recreate the database, then write the publication types to it
            Database  DB        = new Database("Publication Harvester Unit Test");
            Harvester harvester = new Harvester(DB);

            harvester.CreateTables();
            ptc.WriteToDB(DB);

            // Read the publication types from the database
            PublicationTypes ptcFromDB =
                new PublicationTypes(DB);

            Assert.AreEqual(ptcFromDB.Categories.Count, 52);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Overall"), 0);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Clinical Trial, Phase II"), 1);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Review of Reported Cases"), 2);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Technical Report"), 3);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Letter"), 4);
            Assert.AreEqual(ptcFromDB.GetCategoryNumber("Comment"), 4);

            // Verify OverrideFirstCategory values
            Assert.IsTrue(ptcFromDB.OverrideFirstCategory.ContainsKey("Review"));
            Assert.IsTrue(ptcFromDB.OverrideFirstCategory.ContainsKey("Review, Multicase"));
            Assert.AreEqual(ptcFromDB.OverrideFirstCategory.ContainsKey("Comment"), false);
        }
        /// <summary>
        /// Use the tests from TestColleagues to set up the database,
        /// then find the colleagues, get their publications and
        /// remove false colleagues.
        ///
        /// This is a static void so that it can be called by other tests.
        /// </summary>
        public static void DoSetUp(out Database DB, out Harvester harvester, out PublicationTypes PubTypes, out NCBI ncbi, string[] Languages)
        {
            // First recreate the database
            DB = new Database("Colleague Generator Unit Test");
            ColleagueFinder.CreateTables(DB);

            // Then use the test fixture setup in TestColleagues to populate it
            TestColleagues testColleagues = new TestColleagues();

            testColleagues.TestColleaguesSetUp();

            // Write the publication types to the database
            PubTypes = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Test Data\\TestColleagues",
                "PublicationTypes.csv"
                );
            PubTypes.WriteToDB(DB);

            // Create the other objects from the database
            harvester = new Harvester(DB);
            Roster roster = new Roster(AppDomain.CurrentDomain.BaseDirectory + "\\Test Data\\TestRoster\\testroster.csv");

            ncbi = new MockNCBI("Medline");

            // Find the colleagues and publications
            ColleagueFinder finder = new ColleagueFinder(DB, roster, ncbi, null);
            People          people = new People(DB);

            foreach (Person person in people.PersonList)
            {
                Person[] found = finder.FindPotentialColleagues(person);
                if (found != null)
                {
                    finder.GetColleaguePublications(found, new string[] { "eng" }, new List <int> {
                        1, 2, 3
                    });
                }
            }

            // Remove false colleagues
            ColleagueFinder.RemoveFalseColleagues(DB, null, "PeoplePublications");


            // Create the extra articles for Bunn and Tobian.
            // Verify that Bunn and Tobian have five articles in common, with years
            // ranging from 1993 to 2001.
            CreateExtraArticlesForTobianAndBunn(DB, PubTypes, Languages);
            DataTable Result = DB.ExecuteQuery(
                @"SELECT p.Year, p.PMID, pp.PositionType AS StarPositionType, 
                       cp.PositionType AS ColleaguePositionType, p.Journal
                  FROM Publications p, ColleaguePublications cp, PeoplePublications pp
                 WHERE pp.Setnb = 'A5401532'
                   AND cp.Setnb = 'A4800524'
                   AND p.PMID = pp.PMID
                   AND p.PMID = cp.PMID
                 ORDER BY p.Year ASC");

            Assert.AreEqual(Result.Rows.Count, 5);
            Assert.AreEqual(Result.Rows[0]["Year"], 1993);
            Assert.AreEqual(Result.Rows[4]["Year"], 2001);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Harvest each of the publications in the people file
        /// </summary>
        /// <param name="PeopleFile">Filename of the people file</param>
        /// <param name="PublicationTypeFile">Filename of publication type file</param>
        /// <param name="ContinueFromInterruption">True if continuing from a previously interrupted harvest</param>
        public void Harvest(string PeopleFile, string PublicationTypeFile, bool ContinueFromInterruption)
        {
            // First verify that the files exist
            if (!File.Exists(PeopleFile))
            {
                MessageBox.Show("The People file '" + PeopleFile + "' does not exist", "People file not found", MessageBoxButtons.OK, MessageBoxIcon.Warning);
                return;
            }
            if (!File.Exists(PublicationTypeFile))
            {
                MessageBox.Show("The Publication Type file '" + PublicationTypeFile + "' does not exist", "Publication Type file not found", MessageBoxButtons.OK, MessageBoxIcon.Warning);
                return;
            }


            UpdateDatabaseStatus();
            if (ContinueFromInterruption)
            {
                AddLogEntry("Continuing interrupted harvest");
            }
            else
            {
                AddLogEntry("Beginning harvesting");
            }

            // Reset lastDSNSelected to make sure that the next check for interrupted data is NOT skipped
            lastDSNSelected = "";

            // Initialize the harvester
            Harvester harvester;
            Database  DB;

            // Initialize objects
            try
            {
                DB        = new Database(DSN.Text);
                harvester = new Harvester(DB);

                // Set the language restriction
                string[] Languages;
                if (LanguageList.Text != "")
                {
                    Languages           = LanguageList.Text.Split(',');
                    harvester.Languages = Languages;
                    foreach (string Language in Languages)
                    {
                        AddLogEntry("Adding language restriction: " + Language);
                    }
                }
                else
                {
                    AddLogEntry("No language restriction added");
                }
            }
            catch (Exception ex)
            {
                AddLogEntryWithErrorBox(ex.Message, "Unable to begin harvesting");
                return;
            }

            // Initializethe database
            try
            {
                if (!ContinueFromInterruption)
                {
                    AddLogEntry("Initializing the database");
                    harvester.CreateTables();
                    UpdateDatabaseStatus();
                }
            }
            catch (Exception ex)
            {
                AddLogEntryWithErrorBox(ex.Message, "Unable to initialize database");
                return;
            }


            PublicationTypes pubTypes;

            if (ContinueFromInterruption)
            {
                // If we're continuing, read the publication types from the databse
                try
                {
                    AddLogEntry("Reading publication types from the database");
                    pubTypes = new PublicationTypes(DB);
                }
                catch (Exception ex)
                {
                    AddLogEntryWithErrorBox(ex.Message, "Unable to read publication types");
                    return;
                }
                // Remove any data left over from the interruption
                if (ContinueFromInterruption)
                {
                    AddLogEntry("Removing any data left over from the previous interruption");
                    harvester.ClearDataAfterInterruption();
                }
                UpdateDatabaseStatus();
            }
            else
            {
                // Read the publication types from the file and write them to the database
                try
                {
                    AddLogEntry("Writing publication types to database");
                    pubTypes = new PublicationTypes(Path.GetDirectoryName(PublicationTypeFile), Path.GetFileName(PublicationTypeFile));
                    pubTypes.WriteToDB(DB);
                    UpdateDatabaseStatus();
                }
                catch (Exception ex)
                {
                    AddLogEntryWithErrorBox(ex.Message, "Unable to read publication types");
                    return;
                }

                // Read the people
                try
                {
                    AddLogEntry("Reading people from " + Path.GetFileName(PeopleFile) + " and writing them to the database");
                    harvester.ImportPeople(PeopleFile);
                    UpdateDatabaseStatus();
                }
                catch (Exception ex)
                {
                    AddLogEntryWithErrorBox(ex.Message, "Unable to read the people from " + Path.GetFileName(PeopleFile));
                    return;
                }
            }


            // Make an anonymous callback function that keeps track of the callback data
            Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime)
            {
                // No need to update the progress bar for this -- it leads to a messy-looking UI because it's also updated for the person total
                // toolStripProgressBar1.Minimum = 0;
                // toolStripProgressBar1.Maximum = total;
                // toolStripProgressBar1.Value = number;
                toolStripStatusLabel1.Text = "Reading publication " + number.ToString() + " of " + total.ToString() + " (" + averageTime.ToString() + " ms average)";
                UpdateDatabaseStatus();
                Application.DoEvents();
            };

            // Make an anonymous callback function that logs any messages passed back
            Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly)
            {
                if (StatusBarOnly)
                {
                    toolStripStatusLabel1.Text = Message;
                    //this.Refresh();
                    //statusStrip1.Refresh();
                    Application.DoEvents();
                }
                else
                {
                    AddLogEntry(Message);
                }
            };

            // Make an anonymous callback function to return the value of Interrupt for CheckForInterrupt
            Harvester.CheckForInterrupt InterruptCallback = delegate()
            {
                return(InterruptClicked);
            };

            // Get each person's publications and write them to the database
            NCBI ncbi = new NCBI("medline");

            if (NCBI.ApiKeyExists)
            {
                AddLogEntry("Using API key: " + NCBI.ApiKeyPath);
            }
            else
            {
                AddLogEntry("Performance is limited to under 3 requests per second.");
                AddLogEntry("Consider pasting an API key into " + NCBI.ApiKeyPath);
                AddLogEntry("Or set the NCBI_API_KEY_FILE environemnt variable to the API key file path");
                AddLogEntry("For more information, see https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/");
            }
            People people = new People(DB);
            int    totalPeopleInPersonList = people.PersonList.Count;
            int    numberOfPeopleProcessed = 0;

            toolStripProgressBar1.Minimum = 0;
            toolStripProgressBar1.Maximum = totalPeopleInPersonList;

            foreach (Person person in people.PersonList)
            {
                numberOfPeopleProcessed++;
                try
                {
                    // If continuing from interruption, only harvest unharvested people
                    if ((!ContinueFromInterruption) || (!person.Harvested))
                    {
                        AddLogEntry("Getting publications for " + person.Last + " (" + person.Setnb + "), number " + numberOfPeopleProcessed.ToString() + " of " + totalPeopleInPersonList.ToString());
                        toolStripProgressBar1.Value = numberOfPeopleProcessed;
                        double AverageMilliseconds;
                        int    NumPublications = harvester.GetPublications(ncbi, pubTypes, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds);
                        if (InterruptClicked)
                        {
                            AddLogEntry("Publication harvesting was interrupted");
                            UpdateDatabaseStatus();
                            return;
                        }
                        AddLogEntry("Wrote " + NumPublications.ToString() + " publications, average write time " + Convert.ToString(Math.Round(AverageMilliseconds, 1)) + " ms");
                        UpdateDatabaseStatus();
                    }
                    else
                    {
                        AddLogEntry("Already retrieved publications for " + person.Last + " (" + person.Setnb + ")");
                    }
                }
                catch (Exception ex)
                {
                    AddLogEntry("An error occurred while reading publications for " + person.Last + " (" + person.Setnb + "): " + ex.Message);
                }
            }

            AddLogEntry("Finished reading publications");
            UpdateDatabaseStatus();
        }
Ejemplo n.º 5
0
        public void TestGrantsReport()
        {
            // Set up the database with test publications (and don't forget to add the
            // publication types!)
            DB = new Database("Publication Harvester Unit Test");
            Harvester harvester = new Harvester(DB);

            harvester.CreateTables();
            PublicationTypes PubTypes = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
                "PublicationTypes.csv"
                );

            PubTypes.WriteToDB(DB);
            reports = new Reports(DB, AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestReports\\pubmed_jifs.xls");
            Assert.IsTrue(reports.Weights.Count == 10);
            TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22);

            // Write the grants report
            StreamWriter writer = new StreamWriter(AppDomain.CurrentDomain.BaseDirectory + "\\GrantsReport.csv");

            reports.GrantsReport(writer);
            writer.Close();

            // Verify that the grants were written properly

            // Read the rows back from the file
            string ConnectionString =
                "Driver={Microsoft Text Driver (*.txt; *.csv)};Dbq="
                + AppDomain.CurrentDomain.BaseDirectory + ";";
            OdbcConnection  Connection  = new OdbcConnection(ConnectionString);
            OdbcDataAdapter DataAdapter = new OdbcDataAdapter
                                              ("SELECT * FROM [GrantsReport.csv]", Connection);
            DataTable Results = new DataTable();
            int       Rows    = DataAdapter.Fill(Results);

            Connection.Close();

            int numChecked = 0;

            // Check a few selected results
            foreach (DataRow Row in Results.Rows)
            {
                int    Year    = Convert.ToInt32(Row[0]);
                int    PMID    = Convert.ToInt32(Row[1]);
                string GrantID = Row[2].ToString();

                switch (PMID)
                {
                case 3086749:     // Guillemin
                    numChecked++;
                    Assert.IsTrue((Year == 1986) && (
                                      (GrantID == "AM-18811/AM/NIADDK") ||
                                      (GrantID == "HD-09690/HD/NICHD") ||
                                      (GrantID == "MH-00663/MH/NIMH") ||
                                      (GrantID == "AG 03106/AG/NIA") ||
                                      (GrantID == "DK-26741/DK/NIDDK")));
                    break;

                case 9049886:     // Van Eys
                    numChecked++;
                    Assert.IsTrue((Year == 1997) && (GrantID == "RO1-CA33097/CA/NCI"));
                    break;
                }
            }
            Assert.IsTrue(numChecked == 6);

            File.Delete(AppDomain.CurrentDomain.BaseDirectory + "\\GrantsReport.csv");
        }
Ejemplo n.º 6
0
        public void TestMeSHHeadingReport()
        {
            // Set up the database with test publications (and don't forget to add the
            // publication types!)
            DB = new Database("Publication Harvester Unit Test");
            Harvester harvester = new Harvester(DB);

            harvester.CreateTables();
            PublicationTypes PubTypes = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
                "PublicationTypes.csv"
                );

            PubTypes.WriteToDB(DB);
            reports = new Reports(DB, AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestReports\\pubmed_jifs.xls");
            Assert.IsTrue(reports.Weights.Count == 10);
            TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22);

            // Write the MeSH Heading report
            StreamWriter writer = new StreamWriter(AppDomain.CurrentDomain.BaseDirectory + "\\MeSHHeadingReport.csv");

            Reports.ReportStatus StatusCallback = delegate(int number, int total, Person person, bool ProgressBarOnly)
            {
                //
            };
            Reports.ReportMessage MessageCallback = delegate(string Message)
            {
                //
            };
            reports.MeSHHeadingReport(writer, StatusCallback, MessageCallback);
            writer.Close();

            // Verify that the MeSH headings were written properly

            // Read the rows back from the file
            string ConnectionString =
                "Driver={Microsoft Text Driver (*.txt; *.csv)};Dbq="
                + AppDomain.CurrentDomain.BaseDirectory + ";";
            OdbcConnection  Connection  = new OdbcConnection(ConnectionString);
            OdbcDataAdapter DataAdapter = new OdbcDataAdapter
                                              ("SELECT * FROM [MeSHHeadingReport.csv]", Connection);
            DataTable Results = new DataTable();
            int       Rows    = DataAdapter.Fill(Results);

            Connection.Close();

            int numChecked = 0;

            // Check a few selected results
            foreach (DataRow Row in Results.Rows)
            {
                string Setnb   = Row[0].ToString();
                int    Year    = Convert.ToInt32(Row[1]);
                string Heading = Row[2].ToString();
                int    Count   = Convert.ToInt32(Row[3]);

                switch (Setnb)
                {
                case "A6009400":     // Van Eys
                    if ((Year == 1998) && (Heading == "Humans"))
                    {
                        Assert.IsTrue(Count == 2);
                        numChecked++;
                    }
                    if ((Year == 1998) && (Heading == "Child"))
                    {
                        Assert.IsTrue(Count == 1);
                        numChecked++;
                    }
                    if ((Year == 2001) && (Heading == "Humans"))
                    {
                        Assert.IsTrue(Count == 1);
                        numChecked++;
                    }
                    break;

                case "A5702471":     // Guillemin
                    if ((Year == 2005) && (Heading == "Hypothalamic Hormones/*physiology"))
                    {
                        Assert.IsTrue(Count == 1);
                        numChecked++;
                    }
                    break;
                }
            }
            Assert.IsTrue(numChecked == 4);

            File.Delete(AppDomain.CurrentDomain.BaseDirectory + "\\MeSHHeadingReport.csv");
        }
Ejemplo n.º 7
0
        public void TestPeopleReportRows()
        {
            // Set up the database with test publications (and don't forget to add the
            // publication types!)
            DB = new Database("Publication Harvester Unit Test");
            Harvester harvester = new Harvester(DB);

            harvester.CreateTables();
            PublicationTypes PubTypes = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
                "PublicationTypes.csv"
                );

            PubTypes.WriteToDB(DB);
            reports = new Reports(DB, AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestReports\\pubmed_jifs.xls");
            Assert.IsTrue(reports.Weights.Count == 10);
            TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22);
            people = new People(DB);


            foreach (Person person in people.PersonList)
            {
                Publications pubs = new Publications(DB, person, false);
                switch (person.Setnb)
                {
                case "A6009400":
                    // Van Eys has two publications in 1998, both have zero weight
                    DataRow Row = WriteAndReadBackCSVRow(reports.ReportRow(person, pubs, 1998));

                    Assert.IsTrue(Row.ItemArray.Length == 74);
                    Assert.IsTrue(Row.ItemArray[0].ToString() == "A6009400");
                    Assert.IsTrue(Row.ItemArray[1].ToString() == "1998");

                    // Verify that all values are zero, except for pubcount (#3),
                    // pubcount_pos1 (#5), 123pubcount (#15), 123pubcount_pos1 (#17),
                    // 3pubcount (#51), 3pubcount_pos1 (#53)
                    for (int i = 2; i <= 73; i++)
                    {
                        if ((i == 2) || (i == 4) || (i == 14) || (i == 16) ||
                            (i == 50) || (i == 52))
                        {
                            Assert.IsTrue(Row.ItemArray[i].ToString() == "2", "Failed at i == " + i.ToString());
                        }
                        else
                        {
                            Assert.IsTrue(Row.ItemArray[i].ToString() == "0", "Failed at i == " + i.ToString());
                        }
                    }
                    break;

                case "A5401532":
                    // Tobian has two publications in 1997 of type 3 with a
                    // combined weight of 4.602
                    Row = WriteAndReadBackCSVRow(reports.ReportRow(person, pubs, 1997));

                    Assert.IsTrue(Row.ItemArray.Length == 74);
                    Assert.IsTrue(Row.ItemArray[0].ToString() == "A5401532");
                    Assert.IsTrue(Row.ItemArray[1].ToString() == "1997");

                    // Verify that all values are zero, except for pubcount (#3),
                    // pubcount_pos1 (#5), 123pubcount (#15), 123pubcount_pos1 (#17),
                    // 3pubcount (#51), 3pubcount_pos1 (#53), which should be 2
                    //
                    // and wghtd_pubcount (#4), wghtd_pubcount_pos1 (#6),
                    // wghtd_123pubcount (#16), wghtd_123pubcount_pos1 (#18),
                    // wghtd_3pubcount (#52), wghtd_3pubcount_pos1 (#54),
                    // which should be 4.602
                    for (int i = 2; i <= 73; i++)
                    {
                        if ((i == 2) || (i == 4) || (i == 14) || (i == 16) ||
                            (i == 50) || (i == 52))
                        {
                            Assert.IsTrue(Row.ItemArray[i].ToString() == "2", "Failed at i == " + i.ToString());
                        }
                        else if ((i == 3) || (i == 5) || (i == 15) || (i == 17) ||
                                 (i == 51) || (i == 53))
                        {
                            Assert.IsTrue(Row.ItemArray[i].ToString() == "4.602", "Failed at i == " + i.ToString());
                        }
                        else
                        {
                            Assert.IsTrue(Row.ItemArray[i].ToString() == "0", "Failed at i == " + i.ToString());
                        }
                    }
                    break;
                }
            }
        }
Ejemplo n.º 8
0
        public void TestSkipSetnbs()
        {
            // Set up the database with test publications (and don't forget to add the
            // publication types!)
            DB = new Database("Publication Harvester Unit Test");
            Harvester harvester = new Harvester(DB);

            harvester.CreateTables();
            PublicationTypes PubTypes = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
                "PublicationTypes.csv"
                );

            PubTypes.WriteToDB(DB);
            reports = new Reports(DB, AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestReports\\pubmed_jifs.xls");
            Assert.IsTrue(reports.Weights.Count == 10);
            TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22);
            people = new People(DB);


            Reports.ReportStatus StatusCallback = delegate(int number, int total, Person person, bool ProgressBarOnly)
            {
                //
            };
            Reports.ReportMessage MessageCallback = delegate(string Message)
            {
                //
            };

            // Set up the Setnbs to skip
            ArrayList SetnbsToSkip = new ArrayList();

            SetnbsToSkip.Add("A5401532");
            SetnbsToSkip.Add("A6009400");

            // Verify that the people report skips the setnbs
            StreamWriter writer = new StreamWriter(AppDomain.CurrentDomain.BaseDirectory + "\\TestSkipSetnbs.csv");

            reports.PeopleReport(SetnbsToSkip, writer, StatusCallback, MessageCallback);
            writer.Close();
            StreamReader reader = new StreamReader(AppDomain.CurrentDomain.BaseDirectory + "\\TestSkipSetnbs.csv");
            string       Line   = reader.ReadLine(); // skip the header row

            while ((Line = reader.ReadLine()) != null)
            {
                Assert.IsFalse(Line.StartsWith("A5401532"));
                Assert.IsFalse(Line.StartsWith("A6009400"));
            }
            reader.Close();

            // Verify that the people report skips the setnbs
            writer = new StreamWriter(AppDomain.CurrentDomain.BaseDirectory + "\\TestSkipSetnbs.csv", false);
            reports.PubsReport(SetnbsToSkip, writer, StatusCallback, MessageCallback);
            writer.Close();
            reader = new StreamReader(AppDomain.CurrentDomain.BaseDirectory + "\\TestSkipSetnbs.csv");
            Line   = reader.ReadLine(); // skip the header row
            while ((Line = reader.ReadLine()) != null)
            {
                Assert.IsFalse(Line.StartsWith("A5401532"));
                Assert.IsFalse(Line.StartsWith("A6009400"));
            }
            reader.Close();

            File.Delete(AppDomain.CurrentDomain.BaseDirectory + "\\TestSkipSetnbs.csv");
        }
Ejemplo n.º 9
0
        public void TestEntireReport()
        {
            string Setnb = "";

            // Set up the database with test publications (and don't forget to add the
            // publication types!)
            DB = new Database("Publication Harvester Unit Test");
            Harvester harvester = new Harvester(DB);

            harvester.CreateTables();
            PublicationTypes PubTypes = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
                "PublicationTypes.csv"
                );

            PubTypes.WriteToDB(DB);
            reports = new Reports(DB, AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestReports\\pubmed_jifs.xls");
            Assert.IsTrue(reports.Weights.Count == 10);
            TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22);
            people = new People(DB);


            // Write the report
            StreamWriter writer = new StreamWriter(AppDomain.CurrentDomain.BaseDirectory + "\\TestEntireReport.csv");

            Reports.ReportStatus StatusCallback   = delegate(int number, int total, Person person, bool ProgressBarOnly) {
                //
            };
            Reports.ReportMessage MessageCallback = delegate(string Message)
            {
                //
            };
            reports.PeopleReport(null, writer, StatusCallback, MessageCallback);
            writer.Close();

            // Read the report into an array
            var lines = File.ReadAllLines($"{AppDomain.CurrentDomain.BaseDirectory}\\TestEntireReport.csv")
                        .Select(line => line.Split(new char[] { ',' }));
            var header = lines.First();
            var data   = lines.Skip(1).ToList();

            Assert.AreEqual(85, data.Count);

            string ReportData(string setnb, string year, string column)
            {
                var index = Array.IndexOf(header, column);
                var row   = data.Where(line => line[0] == setnb && line[1] == year);

                Assert.AreEqual(1, row.Count(), $"Unable to find setnb={setnb} year={year} in TestEntireReport.csv");
                return(row.First()[index]);
            }

            var q = ReportData("A5401532", "1997", "wghtd_pubcount_pos1");

            // Read the report file that was generated by hand (TestEntireReport_Data.xls)
            // and check each value against the report that was generated by Reports()
            string[] Columns =
            {
                "setnb",                    "year",                   "pubcount",               "wghtd_pubcount",       "pubcount_pos1",
                "wghtd_pubcount_pos1",      "pubcount_posN",          "wghtd_pubcount_posN",
                "pubcount_posM",            "wghtd_pubcount_posM",    "pubcount_posNTL",
                "wghtd_pubcount_posNTL",    "pubcount_pos2",          "wghtd_pubcount_pos2",
                "123pubcount",              "wghtd_123pubcount",      "123pubcount_pos1",
                "wghtd_123pubcount_pos1",   "123pubcount_posN",       "wghtd_123pubcount_posN",
                "123pubcount_posM",         "wghtd_123pubcount_posM", "123pubcount_posNTL",
                "wghtd_123pubcount_posNTL", "123pubcount_pos2",       "wghtd_123pubcount_pos2",
                "1pubcount",                "wghtd_1pubcount",        "1pubcount_pos1",         "wghtd_1pubcount_pos1",
                "1pubcount_posN",           "wghtd_1pubcount_posN",   "1pubcount_posM",
                "wghtd_1pubcount_posM",     "1pubcount_posNTL",       "wghtd_1pubcount_posNTL",
                "1pubcount_pos2",           "wghtd_1pubcount_pos2",   "2pubcount",              "wghtd_2pubcount",
                "2pubcount_pos1",           "wghtd_2pubcount_pos1",   "2pubcount_posN",
                "wghtd_2pubcount_posN",     "2pubcount_posM",         "wghtd_2pubcount_posM",
                "2pubcount_posNTL",         "wghtd_2pubcount_posNTL", "2pubcount_pos2",
                "wghtd_2pubcount_pos2",     "3pubcount",              "wghtd_3pubcount",        "3pubcount_pos1",
                "wghtd_3pubcount_pos1",     "3pubcount_posN",         //"wghtd_3pubcount_posN",
                "3pubcount_posM",           "wghtd_3pubcount_posM",   "3pubcount_posNTL",
                "wghtd_3pubcount_posNTL",   "3pubcount_pos2",         "wghtd_3pubcount_pos2",
                "4pubcount",                "wghtd_4pubcount",        "4pubcount_pos1",         "wghtd_4pubcount_pos1",
                "4pubcount_posN",           "wghtd_4pubcount_posN",   "4pubcount_posM",
                "wghtd_4pubcount_posM",     "4pubcount_posNTL",       "wghtd_4pubcount_posNTL",
                "4pubcount_pos2",           "wghtd_4pubcount_pos2"
            };

            DataTable HandGeneratedData = NpoiHelper.ReadExcelFileToDataTable(AppDomain.CurrentDomain.BaseDirectory +
                                                                              "\\Unit Tests\\TestReports", "TestEntireReport_Data.xls");

            Assert.AreEqual(HandGeneratedData.Rows.Count, 85);

            var valuesChecked = 0;

            for (int RowNum = 0; RowNum < HandGeneratedData.Rows.Count; RowNum++)
            {
                // Find the rows in the hand-generated data and the report
                DataRow HandGeneratedRow = HandGeneratedData.Rows[RowNum];
                Setnb = HandGeneratedRow[0].ToString();
                int Year = Convert.ToInt32(HandGeneratedRow[1]);

                for (int i = 2; i < Columns.Length; i++)
                {
                    valuesChecked++;
                    String columnName    = Columns[i];
                    var    actualValue   = ReportData(Setnb, Year.ToString(), columnName);
                    string expectedValue = HandGeneratedRow[columnName].ToString();
                    Assert.AreEqual(expectedValue, actualValue, Setnb + "/" + Year.ToString() + "/" + columnName + " -- hand generated has " + expectedValue + ", report has" + actualValue);
                }
            }

            Assert.AreEqual((HandGeneratedData.Rows.Count) * (Columns.Length - 2), valuesChecked);

            // Use BackupReportAndGetSetnbs to back up the report -- check that it
            // returns the correct list of Setnbs and removes the last one from
            // the file. The last Setnb should still be in Setnb.
            ArrayList Setnbs = Reports.BackupReportAndGetSetnbs(AppDomain.CurrentDomain.BaseDirectory + "\\TestEntireReport.csv");
            // Read the backup file that was created, make sure that the last setnb in the
            // file isn't contained and the others are
            StreamReader reader = new StreamReader(AppDomain.CurrentDomain.BaseDirectory + "\\TestEntireReport.csv.bak");
            string       Line   = reader.ReadLine();   //skip the header row

            while ((Line = reader.ReadLine()) != null) // Find the last setnb in the original file
            {
                Setnb = Line.Substring(0, 8);
            }
            string RemovedSetnb = Setnb;

            Assert.IsFalse(Setnbs.Contains(RemovedSetnb));
            reader.Close();

            // Verify that the new file contains only the other setnbs
            Assert.IsTrue(Setnbs.Count == 3);
            Assert.IsFalse(Setnbs.Contains(RemovedSetnb));
            reader = new StreamReader(AppDomain.CurrentDomain.BaseDirectory + "\\TestEntireReport.csv");
            Line   = reader.ReadLine(); //skip the header row
            while ((Line = reader.ReadLine()) != null)
            {
                Setnb = Line.Substring(0, 8);
                Assert.IsTrue(Setnbs.Contains(Setnb));
                Assert.IsFalse(Setnb == RemovedSetnb);
            }
            reader.Close();

            // Delete the temporary files
            File.Delete(AppDomain.CurrentDomain.BaseDirectory + "\\TestEntireReport.csv");
            File.Delete(AppDomain.CurrentDomain.BaseDirectory + "\\TestEntireReport.csv.bak");
        }
Ejemplo n.º 10
0
        public void TestPubsReportRow()
        {
            // Set up the database with test publications (and don't forget to add the
            // publication types!)
            DB = new Database("Publication Harvester Unit Test");
            Harvester harvester = new Harvester(DB);

            harvester.CreateTables();
            PublicationTypes PubTypes = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
                "PublicationTypes.csv"
                );

            PubTypes.WriteToDB(DB);
            reports = new Reports(DB, AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestReports\\pubmed_jifs.xls");
            Assert.IsTrue(reports.Weights.Count == 10);
            TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22);
            people = new People(DB);


            foreach (Person person in people.PersonList)
            {
                Publications pubs = new Publications(DB, person, false);

                if (pubs.PublicationList != null)
                {
                    foreach (Publication pub in pubs.PublicationList)
                    {
                        DataRow Row = WriteAndReadBackCSVRow(reports.PubsReportRow(person, pub));

                        switch (pub.PMID)
                        {
                        case 15249795:
                            // 0. setnb
                            Assert.IsTrue(Row.ItemArray[0].ToString() == "A5401532");

                            // 1. pmid
                            Assert.IsTrue(Row.ItemArray[1].ToString() == "15249795");

                            // 2. journal_name
                            Assert.IsTrue(Row.ItemArray[2].ToString() == "J Clin Hypertens (Greenwich)");

                            // 3. year
                            Assert.IsTrue(Row.ItemArray[3].ToString() == "2004");

                            // 4. Month
                            Assert.IsTrue(Row.ItemArray[4].ToString() == "Jul");

                            // 5. day
                            Assert.IsTrue(Row.ItemArray[5].ToString() == "");

                            // 6. title
                            Assert.IsTrue(Row.ItemArray[6].ToString() == "Interview with Louis Tobian, MD. Interview by Marvin Moser.");

                            // 7. Volume
                            Assert.IsTrue(Row.ItemArray[7].ToString() == "6");

                            // 8. issue
                            Assert.IsTrue(Row.ItemArray[8].ToString() == "7");

                            // 9. position
                            Assert.IsTrue(Row.ItemArray[9].ToString() == "1");

                            // 10. nbauthors
                            Assert.IsTrue(Row.ItemArray[10].ToString() == "1");

                            // 11. Bin
                            Assert.IsTrue(Row.ItemArray[11].ToString() == "0");

                            // 12. Pages
                            Assert.IsTrue(Row.ItemArray[12].ToString() == "391-2");

                            // 13. Publication_type
                            Assert.IsTrue(Row.ItemArray[13].ToString() == "Historical Article");


                            break;
                        }
                    }
                }
            }
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Set up the database with data from Input1.XLS using the Mock NCBI object
        /// (this is also called from TestReports())
        /// </summary>
        /// <param name="NCBISearchThrowsAnError">True if the MockNCBI object is supposed to throw an error</param>
        public static void GetPublicationsFromInput1XLS_Using_MockNCBI(bool NCBISearchThrowsAnError, string[] Languages, int ExpectedPublications)
        {
            bool TablesCreated;
            int  NumPeople;
            int  NumHarvestedPeople;
            int  NumPublications;
            int  NumErrors;

            Database DB = new Database("Publication Harvester Unit Test");

            // Drop all tables and make sure the database reports as empty
            foreach (string Table in new string[] {
                "meshheadings", "people", "peoplepublications", "publicationauthors",
                "publicationmeshheadings", "publications", "pubtypecategories"
            })
            {
                DB.ExecuteNonQuery("DROP TABLE IF EXISTS " + Table);
                DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors);
                Assert.IsFalse(TablesCreated);
                Assert.AreEqual(NumPeople, 0);
                Assert.AreEqual(NumHarvestedPeople, 0);
                Assert.AreEqual(NumPublications, 0);
                Assert.AreEqual(NumErrors, 0);
            }

            // Create and populate the tables
            Harvester harvester = new Harvester(DB);

            harvester.Languages = Languages;
            MockNCBI mockNCBI = new MockNCBI("medline");

            mockNCBI.SearchThrowsAnError = NCBISearchThrowsAnError;
            PublicationTypes ptc = new PublicationTypes(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
                "PublicationTypes.csv"
                );

            // Reinitialize the database
            harvester.CreateTables();
            ptc.WriteToDB(DB);

            // Retrieve the publications for each person in input1.xls using GetPublications()
            People PeopleFromFile = new People(
                AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPeople",
                "input1.xls");

            // Make an anonymous callback function that keeps track of the callback data
            int Callbacks = 0; // this will count all of the publications

            Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime)
            {
                Callbacks++;
            };

            // Make an anonymous callback function to do nothing for GetPublicationsMessage
            Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly)
            {
                //
            };

            // Make an anonymous callback function to return false for CheckForInterrupt
            Harvester.CheckForInterrupt InterruptCallback = delegate()
            {
                return(false);
            };


            // Verify that the database was created and populated properly
            DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors);
            Assert.IsTrue(TablesCreated);
            Assert.AreEqual(NumPeople, 0);
            Assert.AreEqual(NumHarvestedPeople, 0);
            Assert.AreEqual(NumPublications, 0);
            Assert.AreEqual(NumErrors, 0);

            int PeopleCount    = 0;
            int HarvestedCount = 0;
            int PubCount       = 0;

            foreach (Person person in PeopleFromFile.PersonList)
            {
                double AverageMilliseconds;

                // First write the person to the database
                person.WriteToDB(DB);
                PeopleCount++;

                // Check that the database status is updated properly
                DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors);
                Assert.IsTrue(TablesCreated);
                Assert.AreEqual(NumPeople, PeopleCount);
                if (!NCBISearchThrowsAnError)
                {
                    Assert.AreEqual(NumHarvestedPeople, HarvestedCount);
                }
                else
                {
                    Assert.AreEqual(NumHarvestedPeople, 0);
                }
                Assert.AreEqual(NumPublications, PubCount);
                if (!NCBISearchThrowsAnError)
                {
                    Assert.AreEqual(NumErrors, 0);
                }
                else
                {
                    Assert.AreEqual(NumErrors, PeopleCount - 1);
                }

                // Harvest the person's publications
                PubCount += harvester.GetPublications(mockNCBI, ptc, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds);
                HarvestedCount++;

                // Check the status again after the people were harvested
                DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors);
                Assert.IsTrue(TablesCreated);
                Assert.AreEqual(NumPeople, PeopleCount);
                if (!NCBISearchThrowsAnError)
                {
                    Assert.AreEqual(NumHarvestedPeople, HarvestedCount);
                }
                else
                {
                    Assert.AreEqual(NumHarvestedPeople, 0);
                }
                Assert.AreEqual(NumPublications, PubCount);
                if (!NCBISearchThrowsAnError)
                {
                    Assert.AreEqual(NumErrors, 0);
                }
                else
                {
                    Assert.AreEqual(NumErrors, PeopleCount);
                }
            }

            // Verify that the database was written properly
            if (!NCBISearchThrowsAnError)
            {
                Assert.IsTrue(Callbacks == 24);
            }
            else
            {
                Assert.IsTrue(Callbacks == 0);
            }
            DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors);
            Assert.IsTrue(TablesCreated);
            Assert.AreEqual(NumPeople, 4);
            if (!NCBISearchThrowsAnError)
            {
                Assert.AreEqual(NumHarvestedPeople, 4);
                Assert.AreEqual(NumPublications, ExpectedPublications);
                Assert.AreEqual(NumErrors, 0);
            }
            else
            {
                Assert.AreEqual(NumHarvestedPeople, 0);
                Assert.AreEqual(NumPublications, 0);
                Assert.AreEqual(NumErrors, 4);
            }
        }