public void ResetDatabase() { // Import "TestPeopleMaintenance/input1 plus testhyphens.xls" into the People table People PeopleFromFile = new People( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPeopleMaintenance", "input1 plus testhypens.xls"); DB = new Database("Publication Harvester Unit Test"); Harvester harvester = new Harvester(DB); harvester.CreateTables(); MockNCBI mockNCBI = new MockNCBI("medline"); mockNCBI.SearchThrowsAnError = false; PublicationTypes ptc = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); ptc.WriteToDB(DB); // Anonymous callback functions for GetPublications Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime) { // }; Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly) { // }; Harvester.CheckForInterrupt InterruptCallback = delegate() { return(false); }; // Write the people, then "harvest" the publications using MockNCBI double AverageMilliseconds; foreach (Person person in PeopleFromFile.PersonList) { person.WriteToDB(DB); harvester.GetPublications(mockNCBI, ptc, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds); } People PeopleFromDB = new People(DB); Assert.AreEqual(PeopleFromDB.PersonList.Count, 4); }
public void TestColleaguesSetUp() { // Create the AAMC roster object roster = new Roster(AppDomain.CurrentDomain.BaseDirectory + "\\Test Data\\TestRoster\\testroster.csv"); // Stuff for GetPublications() // Make an anonymous callback function that keeps track of the callback data Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime) { // }; // Make an anonymous callback function to do nothing for GetPublicationsMessage Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly) { // }; // Make an anonymous callback function to return false for CheckForInterrupt Harvester.CheckForInterrupt InterruptCallback = delegate() { return(false); }; double AverageMilliseconds; // Read the people file People PeopleFromFile = new People( AppDomain.CurrentDomain.BaseDirectory + "\\Test Data\\TestColleagues", "PeopleFile.xls"); // Drop all tables from the test database DB = new Database("Colleague Generator Unit Test"); foreach (string Table in new string[] { "colleaguepublications", "colleagues", "meshheadings", "people", "peoplepublications", "publicationauthors", "publicationgrants", "publicationmeshheadings", "publications", "pubtypecategories", "starcolleagues" } ) { DB.ExecuteNonQuery("DROP TABLE IF EXISTS " + Table + ";"); } // Create the test database harvester = new Harvester(DB); harvester.CreateTables(); ColleagueFinder.CreateTables(DB); // Populate it using the Mock NCBI object ncbi = new MockNCBI("Medline"); PubTypes = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Test Data\\TestColleagues", "PublicationTypes.csv" ); // Write each person and his publications to the database foreach (Person person in PeopleFromFile.PersonList) { person.WriteToDB(DB); harvester.GetPublications(ncbi, PubTypes, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds); } }
/// <summary> /// Harvest each of the publications in the people file /// </summary> /// <param name="PeopleFile">Filename of the people file</param> /// <param name="PublicationTypeFile">Filename of publication type file</param> /// <param name="ContinueFromInterruption">True if continuing from a previously interrupted harvest</param> public void Harvest(string PeopleFile, string PublicationTypeFile, bool ContinueFromInterruption) { // First verify that the files exist if (!File.Exists(PeopleFile)) { MessageBox.Show("The People file '" + PeopleFile + "' does not exist", "People file not found", MessageBoxButtons.OK, MessageBoxIcon.Warning); return; } if (!File.Exists(PublicationTypeFile)) { MessageBox.Show("The Publication Type file '" + PublicationTypeFile + "' does not exist", "Publication Type file not found", MessageBoxButtons.OK, MessageBoxIcon.Warning); return; } UpdateDatabaseStatus(); if (ContinueFromInterruption) { AddLogEntry("Continuing interrupted harvest"); } else { AddLogEntry("Beginning harvesting"); } // Reset lastDSNSelected to make sure that the next check for interrupted data is NOT skipped lastDSNSelected = ""; // Initialize the harvester Harvester harvester; Database DB; // Initialize objects try { DB = new Database(DSN.Text); harvester = new Harvester(DB); // Set the language restriction string[] Languages; if (LanguageList.Text != "") { Languages = LanguageList.Text.Split(','); harvester.Languages = Languages; foreach (string Language in Languages) { AddLogEntry("Adding language restriction: " + Language); } } else { AddLogEntry("No language restriction added"); } } catch (Exception ex) { AddLogEntryWithErrorBox(ex.Message, "Unable to begin harvesting"); return; } // Initializethe database try { if (!ContinueFromInterruption) { AddLogEntry("Initializing the database"); harvester.CreateTables(); UpdateDatabaseStatus(); } } catch (Exception ex) { AddLogEntryWithErrorBox(ex.Message, "Unable to initialize database"); return; } PublicationTypes pubTypes; if (ContinueFromInterruption) { // If we're continuing, read the publication types from the databse try { AddLogEntry("Reading publication types from the database"); pubTypes = new PublicationTypes(DB); } catch (Exception ex) { AddLogEntryWithErrorBox(ex.Message, "Unable to read publication types"); return; } // Remove any data left over from the interruption if (ContinueFromInterruption) { AddLogEntry("Removing any data left over from the previous interruption"); harvester.ClearDataAfterInterruption(); } UpdateDatabaseStatus(); } else { // Read the publication types from the file and write them to the database try { AddLogEntry("Writing publication types to database"); pubTypes = new PublicationTypes(Path.GetDirectoryName(PublicationTypeFile), Path.GetFileName(PublicationTypeFile)); pubTypes.WriteToDB(DB); UpdateDatabaseStatus(); } catch (Exception ex) { AddLogEntryWithErrorBox(ex.Message, "Unable to read publication types"); return; } // Read the people try { AddLogEntry("Reading people from " + Path.GetFileName(PeopleFile) + " and writing them to the database"); harvester.ImportPeople(PeopleFile); UpdateDatabaseStatus(); } catch (Exception ex) { AddLogEntryWithErrorBox(ex.Message, "Unable to read the people from " + Path.GetFileName(PeopleFile)); return; } } // Make an anonymous callback function that keeps track of the callback data Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime) { // No need to update the progress bar for this -- it leads to a messy-looking UI because it's also updated for the person total // toolStripProgressBar1.Minimum = 0; // toolStripProgressBar1.Maximum = total; // toolStripProgressBar1.Value = number; toolStripStatusLabel1.Text = "Reading publication " + number.ToString() + " of " + total.ToString() + " (" + averageTime.ToString() + " ms average)"; UpdateDatabaseStatus(); Application.DoEvents(); }; // Make an anonymous callback function that logs any messages passed back Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly) { if (StatusBarOnly) { toolStripStatusLabel1.Text = Message; //this.Refresh(); //statusStrip1.Refresh(); Application.DoEvents(); } else { AddLogEntry(Message); } }; // Make an anonymous callback function to return the value of Interrupt for CheckForInterrupt Harvester.CheckForInterrupt InterruptCallback = delegate() { return(InterruptClicked); }; // Get each person's publications and write them to the database NCBI ncbi = new NCBI("medline"); if (NCBI.ApiKeyExists) { AddLogEntry("Using API key: " + NCBI.ApiKeyPath); } else { AddLogEntry("Performance is limited to under 3 requests per second."); AddLogEntry("Consider pasting an API key into " + NCBI.ApiKeyPath); AddLogEntry("Or set the NCBI_API_KEY_FILE environemnt variable to the API key file path"); AddLogEntry("For more information, see https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/"); } People people = new People(DB); int totalPeopleInPersonList = people.PersonList.Count; int numberOfPeopleProcessed = 0; toolStripProgressBar1.Minimum = 0; toolStripProgressBar1.Maximum = totalPeopleInPersonList; foreach (Person person in people.PersonList) { numberOfPeopleProcessed++; try { // If continuing from interruption, only harvest unharvested people if ((!ContinueFromInterruption) || (!person.Harvested)) { AddLogEntry("Getting publications for " + person.Last + " (" + person.Setnb + "), number " + numberOfPeopleProcessed.ToString() + " of " + totalPeopleInPersonList.ToString()); toolStripProgressBar1.Value = numberOfPeopleProcessed; double AverageMilliseconds; int NumPublications = harvester.GetPublications(ncbi, pubTypes, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds); if (InterruptClicked) { AddLogEntry("Publication harvesting was interrupted"); UpdateDatabaseStatus(); return; } AddLogEntry("Wrote " + NumPublications.ToString() + " publications, average write time " + Convert.ToString(Math.Round(AverageMilliseconds, 1)) + " ms"); UpdateDatabaseStatus(); } else { AddLogEntry("Already retrieved publications for " + person.Last + " (" + person.Setnb + ")"); } } catch (Exception ex) { AddLogEntry("An error occurred while reading publications for " + person.Last + " (" + person.Setnb + "): " + ex.Message); } } AddLogEntry("Finished reading publications"); UpdateDatabaseStatus(); }
public void TestTwoPeopleWithSameNames() { Database DB = new Database("Publication Harvester Unit Test"); // Set up the database TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22); // Add two people to the database with the same names and search criteria // (where the search should make MockNCBI use OtherPeople.dat) string[] names = new string[2]; names[0] = "Guy JF"; names[1] = "Guy J"; Person Joe = new Person("A1234567", "JOE", "FIRST", "GUY", false, names, "Special query for OtherPeople.dat"); Joe.WriteToDB(DB); Person Jane = new Person("Z7654321", "JANE", "FIFTH", "GUY", false, names, "Special query for OtherPeople.dat"); Jane.WriteToDB(DB); // Also add Jim, but give him an error so we can make sure it's cleared Person Jim = new Person("Q2222222", "JIM", "FOURTEENTH", "GUY", false, names, "Special query for OtherPeople.dat"); Jim.WriteToDB(DB); Jim.WriteErrorToDB(DB, "This is an error message"); PublicationTypes ptc = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); // Make an anonymous callback function that keeps track of the callback data int Callbacks = 0; // this will count all of the publications Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime) { Callbacks++; }; // Make an anonymous callback function to do nothing for GetPublicationsMessage int MessageCallbacks = 0; Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly) { // Only increment MessageCallbacks if the message contains Joe's Setnb // and the word "same" if ((Message.Contains("A1234567") || (Message.Contains("Q2222222")) && Message.Contains("same"))) { MessageCallbacks++; } }; // Make an anonymous callback function to return false for CheckForInterrupt Harvester.CheckForInterrupt InterruptCallback = delegate() { return(false); }; // More stuff for the harvester Harvester harvester = new Harvester(DB); MockNCBI mockNCBI = new MockNCBI("medline"); double AverageMilliseconds; // Harvest the people harvester.GetPublications(mockNCBI, ptc, Jane, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds); // Make sure the harvester got Jane's publications DataTable Results = DB.ExecuteQuery("SELECT PMID FROM PeoplePublications WHERE Setnb = 'Z7654321'"); Assert.AreEqual(Results.Rows.Count, 3); foreach (DataRow Row in Results.Rows) { Assert.IsTrue( (Row["PMID"].ToString() == "2417121") || (Row["PMID"].ToString() == "12679283") || (Row["PMID"].ToString() == "14653276")); } ArrayList Parameters = new ArrayList(); Parameters.Add(Database.Parameter(Jane.Setnb)); Results = DB.ExecuteQuery("SELECT Harvested, Error, ErrorMessage FROM People WHERE Setnb = ?", Parameters); Assert.AreEqual(Results.Rows[0]["Harvested"], true); Assert.AreEqual(Results.Rows[0]["Error"], DBNull.Value); Assert.AreEqual(Results.Rows[0]["ErrorMessage"].ToString(), ""); // It should also get Joe's publications. It should call MessageCallback() // twice to let us know Joe'and Jim's s publications were found, and it // should add the appropriate rows to PeoplePublications. Assert.AreEqual(MessageCallbacks, 2); Results = DB.ExecuteQuery("SELECT PMID FROM PeoplePublications WHERE Setnb = 'A1234567'"); Assert.AreEqual(Results.Rows.Count, 3); foreach (DataRow Row in Results.Rows) { Assert.IsTrue( (Row["PMID"].ToString() == "2417121") || (Row["PMID"].ToString() == "12679283") || (Row["PMID"].ToString() == "14653276")); } Parameters = new ArrayList(); Parameters.Add(Database.Parameter(Joe.Setnb)); Results = DB.ExecuteQuery("SELECT " + Database.PEOPLE_COLUMNS + " FROM People WHERE Setnb = ?", Parameters); bool boolValue; // needed for GetBoolValue workaround for bit field bug in MySQL Assert.IsTrue(Database.GetBoolValue(Results.Rows[0]["Harvested"], out boolValue)); Assert.IsTrue(boolValue); Assert.IsTrue(Database.GetBoolValue(Results.Rows[0]["Error"], out boolValue)); Assert.IsFalse(boolValue); Assert.AreEqual(Results.Rows[0]["Error"], DBNull.Value); Assert.AreEqual(Results.Rows[0]["ErrorMessage"].ToString(), ""); // It should also get Jim's publications -- and it should also clear his error. Assert.AreEqual(MessageCallbacks, 2); Results = DB.ExecuteQuery("SELECT PMID FROM PeoplePublications WHERE Setnb = 'A1234567'"); Assert.AreEqual(Results.Rows.Count, 3); foreach (DataRow Row in Results.Rows) { Assert.IsTrue( (Row["PMID"].ToString() == "2417121") || (Row["PMID"].ToString() == "12679283") || (Row["PMID"].ToString() == "14653276")); } Parameters = new ArrayList(); Parameters.Add(Database.Parameter(Jim.Setnb)); Results = DB.ExecuteQuery("SELECT " + Database.PEOPLE_COLUMNS + "FROM People WHERE Setnb = ?", Parameters); Assert.IsTrue(Database.GetBoolValue(Results.Rows[0]["Harvested"], out boolValue)); Assert.AreEqual(boolValue, true); Assert.AreEqual(Results.Rows[0]["Error"], DBNull.Value); Assert.AreEqual(Results.Rows[0]["ErrorMessage"].ToString(), ""); }
public void TestOtherPeople() { // Note: Publication 14560782 was added to OtherPeople.dat to verify // that the software handles the situation where a publication has // no authors listed. Database DB = new Database("Publication Harvester Unit Test"); Harvester harvester = new Harvester(DB); MockNCBI mockNCBI = new MockNCBI("medline"); PublicationTypes ptc = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); // Reinitialize the database harvester.CreateTables(); // Make an anonymous callback function that keeps track of the callback data int Callbacks = 0; // this will count all of the publications Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime) { Callbacks++; }; // Make an anonymous callback function to do nothing for GetPublicationsMessage Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly) { // }; // Make an anonymous callback function to return false for CheckForInterrupt Harvester.CheckForInterrupt InterruptCallback = delegate() { return(false); }; // Create a new person to test string[] names = new string[2]; names[0] = "Klein RG"; names[1] = "Guillemin R"; Person person = new Person("A1234567", "FIRST", "MIDDLE", "LAST", false, names, "Special query for OtherPeople.dat"); person.WriteToDB(DB); double AverageMilliseconds; harvester.GetPublications(mockNCBI, ptc, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds); // Verify that the data was written properly int FoundPublications = 0; Publications pubs = new Publications(DB, person, false); if (pubs.PublicationList != null) { foreach (Publication pub in pubs.PublicationList) { FoundPublications++; switch (pub.PMID) { case 12679283: // The weird part of this publication is the second MeSH heading, which is very long Assert.IsTrue(pub.MeSHHeadings.Count == 23); Assert.IsTrue(pub.MeSHHeadings.Contains( "Attention Deficit and Disruptive Behavior Disorders/etiology/*prevention & control/psychology")); break; case 2417121: // The weird part of this publication is the date, which has a weird format that causes // the day to be long Assert.IsTrue(pub.Day == "19-1986 Jan 1"); break; case 6148773: // One of the headers is long Assert.IsTrue(pub.MeSHHeadings.Contains("Peptide Fragments/antagonists & inhibitors/chemical synthesis/diagnostic use/isolation & purification/pharmacology/*physiology")); break; case 16291338: // One of the authors is long Assert.IsTrue(pub.Authors.Length == 8); Assert.IsTrue(pub.Authors[7] == "For The Michigan Alliance For The National Children's Study"); break; case 15451956: // Volume is long Assert.IsTrue(pub.Volume == "Suppl Web Exclusives"); break; case 14653276: // Issue is long Assert.IsTrue(pub.Issue == "5 Suppl Nitric Oxide"); break; case 9965612: // Journal name is long Assert.IsTrue(pub.Journal == "PHYSICAL REVIEW. E. STATISTICAL PHYSICS, PLASMAS, FLUIDS, AND RELATED INTERDISCIPLINARY TOPICS"); break; case 9469584: // Title is long Assert.IsTrue(pub.Title == Database.Left("Down-regulation of cholesterol biosynthesis in sitosterolemia: diminished activities of acetoacetyl-CoA thiolase, 3-hydroxy-3-methylglutaryl-CoA synthase, reductase, squalene synthase, and 7-dehydrocholesterol delta7-reductase in liver and mononuclear leukocytes." , 244)); break; case 2545230: // Month is long Assert.IsTrue(pub.Month == "Spring-Summer"); break; default: break; } } } Assert.IsTrue(FoundPublications == 13); }
/// <summary> /// Set up the database with data from Input1.XLS using the Mock NCBI object /// (this is also called from TestReports()) /// </summary> /// <param name="NCBISearchThrowsAnError">True if the MockNCBI object is supposed to throw an error</param> public static void GetPublicationsFromInput1XLS_Using_MockNCBI(bool NCBISearchThrowsAnError, string[] Languages, int ExpectedPublications) { bool TablesCreated; int NumPeople; int NumHarvestedPeople; int NumPublications; int NumErrors; Database DB = new Database("Publication Harvester Unit Test"); // Drop all tables and make sure the database reports as empty foreach (string Table in new string[] { "meshheadings", "people", "peoplepublications", "publicationauthors", "publicationmeshheadings", "publications", "pubtypecategories" }) { DB.ExecuteNonQuery("DROP TABLE IF EXISTS " + Table); DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors); Assert.IsFalse(TablesCreated); Assert.AreEqual(NumPeople, 0); Assert.AreEqual(NumHarvestedPeople, 0); Assert.AreEqual(NumPublications, 0); Assert.AreEqual(NumErrors, 0); } // Create and populate the tables Harvester harvester = new Harvester(DB); harvester.Languages = Languages; MockNCBI mockNCBI = new MockNCBI("medline"); mockNCBI.SearchThrowsAnError = NCBISearchThrowsAnError; PublicationTypes ptc = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); // Reinitialize the database harvester.CreateTables(); ptc.WriteToDB(DB); // Retrieve the publications for each person in input1.xls using GetPublications() People PeopleFromFile = new People( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPeople", "input1.xls"); // Make an anonymous callback function that keeps track of the callback data int Callbacks = 0; // this will count all of the publications Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime) { Callbacks++; }; // Make an anonymous callback function to do nothing for GetPublicationsMessage Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly) { // }; // Make an anonymous callback function to return false for CheckForInterrupt Harvester.CheckForInterrupt InterruptCallback = delegate() { return(false); }; // Verify that the database was created and populated properly DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors); Assert.IsTrue(TablesCreated); Assert.AreEqual(NumPeople, 0); Assert.AreEqual(NumHarvestedPeople, 0); Assert.AreEqual(NumPublications, 0); Assert.AreEqual(NumErrors, 0); int PeopleCount = 0; int HarvestedCount = 0; int PubCount = 0; foreach (Person person in PeopleFromFile.PersonList) { double AverageMilliseconds; // First write the person to the database person.WriteToDB(DB); PeopleCount++; // Check that the database status is updated properly DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors); Assert.IsTrue(TablesCreated); Assert.AreEqual(NumPeople, PeopleCount); if (!NCBISearchThrowsAnError) { Assert.AreEqual(NumHarvestedPeople, HarvestedCount); } else { Assert.AreEqual(NumHarvestedPeople, 0); } Assert.AreEqual(NumPublications, PubCount); if (!NCBISearchThrowsAnError) { Assert.AreEqual(NumErrors, 0); } else { Assert.AreEqual(NumErrors, PeopleCount - 1); } // Harvest the person's publications PubCount += harvester.GetPublications(mockNCBI, ptc, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds); HarvestedCount++; // Check the status again after the people were harvested DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors); Assert.IsTrue(TablesCreated); Assert.AreEqual(NumPeople, PeopleCount); if (!NCBISearchThrowsAnError) { Assert.AreEqual(NumHarvestedPeople, HarvestedCount); } else { Assert.AreEqual(NumHarvestedPeople, 0); } Assert.AreEqual(NumPublications, PubCount); if (!NCBISearchThrowsAnError) { Assert.AreEqual(NumErrors, 0); } else { Assert.AreEqual(NumErrors, PeopleCount); } } // Verify that the database was written properly if (!NCBISearchThrowsAnError) { Assert.IsTrue(Callbacks == 24); } else { Assert.IsTrue(Callbacks == 0); } DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors); Assert.IsTrue(TablesCreated); Assert.AreEqual(NumPeople, 4); if (!NCBISearchThrowsAnError) { Assert.AreEqual(NumHarvestedPeople, 4); Assert.AreEqual(NumPublications, ExpectedPublications); Assert.AreEqual(NumErrors, 0); } else { Assert.AreEqual(NumHarvestedPeople, 0); Assert.AreEqual(NumPublications, 0); Assert.AreEqual(NumErrors, 4); } }