/// <summary> /// Harvest each of the publications in the people file /// </summary> /// <param name="PeopleFile">Filename of the people file</param> /// <param name="PublicationTypeFile">Filename of publication type file</param> /// <param name="ContinueFromInterruption">True if continuing from a previously interrupted harvest</param> public void Harvest(string PeopleFile, string PublicationTypeFile, bool ContinueFromInterruption) { // First verify that the files exist if (!File.Exists(PeopleFile)) { MessageBox.Show("The People file '" + PeopleFile + "' does not exist", "People file not found", MessageBoxButtons.OK, MessageBoxIcon.Warning); return; } if (!File.Exists(PublicationTypeFile)) { MessageBox.Show("The Publication Type file '" + PublicationTypeFile + "' does not exist", "Publication Type file not found", MessageBoxButtons.OK, MessageBoxIcon.Warning); return; } UpdateDatabaseStatus(); if (ContinueFromInterruption) { AddLogEntry("Continuing interrupted harvest"); } else { AddLogEntry("Beginning harvesting"); } // Reset lastDSNSelected to make sure that the next check for interrupted data is NOT skipped lastDSNSelected = ""; // Initialize the harvester Harvester harvester; Database DB; // Initialize objects try { DB = new Database(DSN.Text); harvester = new Harvester(DB); // Set the language restriction string[] Languages; if (LanguageList.Text != "") { Languages = LanguageList.Text.Split(','); harvester.Languages = Languages; foreach (string Language in Languages) { AddLogEntry("Adding language restriction: " + Language); } } else { AddLogEntry("No language restriction added"); } } catch (Exception ex) { AddLogEntryWithErrorBox(ex.Message, "Unable to begin harvesting"); return; } // Initializethe database try { if (!ContinueFromInterruption) { AddLogEntry("Initializing the database"); harvester.CreateTables(); UpdateDatabaseStatus(); } } catch (Exception ex) { AddLogEntryWithErrorBox(ex.Message, "Unable to initialize database"); return; } PublicationTypes pubTypes; if (ContinueFromInterruption) { // If we're continuing, read the publication types from the databse try { AddLogEntry("Reading publication types from the database"); pubTypes = new PublicationTypes(DB); } catch (Exception ex) { AddLogEntryWithErrorBox(ex.Message, "Unable to read publication types"); return; } // Remove any data left over from the interruption if (ContinueFromInterruption) { AddLogEntry("Removing any data left over from the previous interruption"); harvester.ClearDataAfterInterruption(); } UpdateDatabaseStatus(); } else { // Read the publication types from the file and write them to the database try { AddLogEntry("Writing publication types to database"); pubTypes = new PublicationTypes(Path.GetDirectoryName(PublicationTypeFile), Path.GetFileName(PublicationTypeFile)); pubTypes.WriteToDB(DB); UpdateDatabaseStatus(); } catch (Exception ex) { AddLogEntryWithErrorBox(ex.Message, "Unable to read publication types"); return; } // Read the people try { AddLogEntry("Reading people from " + Path.GetFileName(PeopleFile) + " and writing them to the database"); harvester.ImportPeople(PeopleFile); UpdateDatabaseStatus(); } catch (Exception ex) { AddLogEntryWithErrorBox(ex.Message, "Unable to read the people from " + Path.GetFileName(PeopleFile)); return; } } // Make an anonymous callback function that keeps track of the callback data Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime) { // No need to update the progress bar for this -- it leads to a messy-looking UI because it's also updated for the person total // toolStripProgressBar1.Minimum = 0; // toolStripProgressBar1.Maximum = total; // toolStripProgressBar1.Value = number; toolStripStatusLabel1.Text = "Reading publication " + number.ToString() + " of " + total.ToString() + " (" + averageTime.ToString() + " ms average)"; UpdateDatabaseStatus(); Application.DoEvents(); }; // Make an anonymous callback function that logs any messages passed back Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly) { if (StatusBarOnly) { toolStripStatusLabel1.Text = Message; //this.Refresh(); //statusStrip1.Refresh(); Application.DoEvents(); } else { AddLogEntry(Message); } }; // Make an anonymous callback function to return the value of Interrupt for CheckForInterrupt Harvester.CheckForInterrupt InterruptCallback = delegate() { return(InterruptClicked); }; // Get each person's publications and write them to the database NCBI ncbi = new NCBI("medline"); if (NCBI.ApiKeyExists) { AddLogEntry("Using API key: " + NCBI.ApiKeyPath); } else { AddLogEntry("Performance is limited to under 3 requests per second."); AddLogEntry("Consider pasting an API key into " + NCBI.ApiKeyPath); AddLogEntry("Or set the NCBI_API_KEY_FILE environemnt variable to the API key file path"); AddLogEntry("For more information, see https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/"); } People people = new People(DB); int totalPeopleInPersonList = people.PersonList.Count; int numberOfPeopleProcessed = 0; toolStripProgressBar1.Minimum = 0; toolStripProgressBar1.Maximum = totalPeopleInPersonList; foreach (Person person in people.PersonList) { numberOfPeopleProcessed++; try { // If continuing from interruption, only harvest unharvested people if ((!ContinueFromInterruption) || (!person.Harvested)) { AddLogEntry("Getting publications for " + person.Last + " (" + person.Setnb + "), number " + numberOfPeopleProcessed.ToString() + " of " + totalPeopleInPersonList.ToString()); toolStripProgressBar1.Value = numberOfPeopleProcessed; double AverageMilliseconds; int NumPublications = harvester.GetPublications(ncbi, pubTypes, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds); if (InterruptClicked) { AddLogEntry("Publication harvesting was interrupted"); UpdateDatabaseStatus(); return; } AddLogEntry("Wrote " + NumPublications.ToString() + " publications, average write time " + Convert.ToString(Math.Round(AverageMilliseconds, 1)) + " ms"); UpdateDatabaseStatus(); } else { AddLogEntry("Already retrieved publications for " + person.Last + " (" + person.Setnb + ")"); } } catch (Exception ex) { AddLogEntry("An error occurred while reading publications for " + person.Last + " (" + person.Setnb + "): " + ex.Message); } } AddLogEntry("Finished reading publications"); UpdateDatabaseStatus(); }
public void GetPeopleFromInputXLS() { // Import input1.xls into the database Database DB = new Database("Publication Harvester Unit Test"); Harvester harvester = new Harvester(DB); harvester.CreateTables(); harvester.ImportPeople(AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPeople\\input1.xls"); DataTable Results = DB.ExecuteQuery( @"SELECT Setnb, First, Middle, Last, Name1, Name2, Name3, Name4, Name5, Name6, MedlineSearch, Harvested, Error, ErrorMessage FROM People" ); // Test each person for (int Row = 0; Row < Results.Rows.Count; Row++) { Person person = new Person(Results.Rows[Row], Results.Columns); switch (person.Setnb) { case "A6009400": Assert.IsTrue(person.First == "Jan"); Assert.IsTrue(person.Middle == ""); Assert.IsTrue(person.Last == "Van Eys"); Assert.IsTrue(person.Names.Length == 3); Assert.IsTrue(person.Names[0] == "van eys j"); Assert.IsTrue(person.Names[1] == "vaneys j"); Assert.IsTrue(person.Names[2] == "eys jv"); Assert.IsTrue(person.MedlineSearch == "(\"van eys j\"[au] OR \"vaneys j\"[au] OR \"eys jv\"[au])"); break; case "A5401532": Assert.IsTrue(person.First == "Louis"); Assert.IsTrue(person.Middle == ""); Assert.IsTrue(person.Last == "Tobian"); Assert.IsTrue(person.Names.Length == 3); Assert.IsTrue(person.Names[0] == "tobian l"); Assert.IsTrue(person.Names[1] == "tobian l jr"); Assert.IsTrue(person.Names[2] == "tobian lj"); Assert.IsTrue(person.MedlineSearch == "(\"tobian l\"[au] OR \"tobian l jr\"[au] OR \"tobian lj\"[au])"); break; case "A5501586": Assert.IsTrue(person.First == "Keith"); Assert.IsTrue(person.Middle == "B"); Assert.IsTrue(person.Last == "Reemtsma"); Assert.IsTrue(person.Names.Length == 6); Assert.IsTrue(person.Names[0] == "reemtsma k"); Assert.IsTrue(person.Names[1] == "reemtsma kb"); Assert.IsTrue(person.Names[2] == "test data"); Assert.IsTrue(person.Names[3] == "more test data"); Assert.IsTrue(person.Names[4] == "test data name 5"); Assert.IsTrue(person.Names[5] == "test data name 6"); Assert.IsTrue(person.MedlineSearch == "((\"reemtsma k\"[au] OR \"reemtsma kb\"[au]) AND 1956:2000[dp])"); break; case "A5702471": Assert.IsTrue(person.First == "Roger"); Assert.IsTrue(person.Middle == ""); Assert.IsTrue(person.Last == "Guillemin"); Assert.IsTrue(person.Names.Length == 2); Assert.IsTrue(person.Names[0] == "guillemin r"); Assert.IsTrue(person.Names[1] == "guillemin rc"); Assert.IsTrue(person.MedlineSearch == "(\"guillemin rc\"[au] OR (\"guillemin r\"[au] NOT (Electrodiagn Ther[ta] OR Phys Rev Lett[ta] OR vegas[ad] OR lindle[au])))" ); break; } } }