Beispiel #1
0
        /// <summary>
        /// Harvest each of the publications in the people file
        /// </summary>
        /// <param name="PeopleFile">Filename of the people file</param>
        /// <param name="PublicationTypeFile">Filename of publication type file</param>
        /// <param name="ContinueFromInterruption">True if continuing from a previously interrupted harvest</param>
        public void Harvest(string PeopleFile, string PublicationTypeFile, bool ContinueFromInterruption)
        {
            // First verify that the files exist
            if (!File.Exists(PeopleFile))
            {
                MessageBox.Show("The People file '" + PeopleFile + "' does not exist", "People file not found", MessageBoxButtons.OK, MessageBoxIcon.Warning);
                return;
            }
            if (!File.Exists(PublicationTypeFile))
            {
                MessageBox.Show("The Publication Type file '" + PublicationTypeFile + "' does not exist", "Publication Type file not found", MessageBoxButtons.OK, MessageBoxIcon.Warning);
                return;
            }


            UpdateDatabaseStatus();
            if (ContinueFromInterruption)
            {
                AddLogEntry("Continuing interrupted harvest");
            }
            else
            {
                AddLogEntry("Beginning harvesting");
            }

            // Reset lastDSNSelected to make sure that the next check for interrupted data is NOT skipped
            lastDSNSelected = "";

            // Initialize the harvester
            Harvester harvester;
            Database  DB;

            // Initialize objects
            try
            {
                DB        = new Database(DSN.Text);
                harvester = new Harvester(DB);

                // Set the language restriction
                string[] Languages;
                if (LanguageList.Text != "")
                {
                    Languages           = LanguageList.Text.Split(',');
                    harvester.Languages = Languages;
                    foreach (string Language in Languages)
                    {
                        AddLogEntry("Adding language restriction: " + Language);
                    }
                }
                else
                {
                    AddLogEntry("No language restriction added");
                }
            }
            catch (Exception ex)
            {
                AddLogEntryWithErrorBox(ex.Message, "Unable to begin harvesting");
                return;
            }

            // Initializethe database
            try
            {
                if (!ContinueFromInterruption)
                {
                    AddLogEntry("Initializing the database");
                    harvester.CreateTables();
                    UpdateDatabaseStatus();
                }
            }
            catch (Exception ex)
            {
                AddLogEntryWithErrorBox(ex.Message, "Unable to initialize database");
                return;
            }


            PublicationTypes pubTypes;

            if (ContinueFromInterruption)
            {
                // If we're continuing, read the publication types from the databse
                try
                {
                    AddLogEntry("Reading publication types from the database");
                    pubTypes = new PublicationTypes(DB);
                }
                catch (Exception ex)
                {
                    AddLogEntryWithErrorBox(ex.Message, "Unable to read publication types");
                    return;
                }
                // Remove any data left over from the interruption
                if (ContinueFromInterruption)
                {
                    AddLogEntry("Removing any data left over from the previous interruption");
                    harvester.ClearDataAfterInterruption();
                }
                UpdateDatabaseStatus();
            }
            else
            {
                // Read the publication types from the file and write them to the database
                try
                {
                    AddLogEntry("Writing publication types to database");
                    pubTypes = new PublicationTypes(Path.GetDirectoryName(PublicationTypeFile), Path.GetFileName(PublicationTypeFile));
                    pubTypes.WriteToDB(DB);
                    UpdateDatabaseStatus();
                }
                catch (Exception ex)
                {
                    AddLogEntryWithErrorBox(ex.Message, "Unable to read publication types");
                    return;
                }

                // Read the people
                try
                {
                    AddLogEntry("Reading people from " + Path.GetFileName(PeopleFile) + " and writing them to the database");
                    harvester.ImportPeople(PeopleFile);
                    UpdateDatabaseStatus();
                }
                catch (Exception ex)
                {
                    AddLogEntryWithErrorBox(ex.Message, "Unable to read the people from " + Path.GetFileName(PeopleFile));
                    return;
                }
            }


            // Make an anonymous callback function that keeps track of the callback data
            Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime)
            {
                // No need to update the progress bar for this -- it leads to a messy-looking UI because it's also updated for the person total
                // toolStripProgressBar1.Minimum = 0;
                // toolStripProgressBar1.Maximum = total;
                // toolStripProgressBar1.Value = number;
                toolStripStatusLabel1.Text = "Reading publication " + number.ToString() + " of " + total.ToString() + " (" + averageTime.ToString() + " ms average)";
                UpdateDatabaseStatus();
                Application.DoEvents();
            };

            // Make an anonymous callback function that logs any messages passed back
            Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly)
            {
                if (StatusBarOnly)
                {
                    toolStripStatusLabel1.Text = Message;
                    //this.Refresh();
                    //statusStrip1.Refresh();
                    Application.DoEvents();
                }
                else
                {
                    AddLogEntry(Message);
                }
            };

            // Make an anonymous callback function to return the value of Interrupt for CheckForInterrupt
            Harvester.CheckForInterrupt InterruptCallback = delegate()
            {
                return(InterruptClicked);
            };

            // Get each person's publications and write them to the database
            NCBI ncbi = new NCBI("medline");

            if (NCBI.ApiKeyExists)
            {
                AddLogEntry("Using API key: " + NCBI.ApiKeyPath);
            }
            else
            {
                AddLogEntry("Performance is limited to under 3 requests per second.");
                AddLogEntry("Consider pasting an API key into " + NCBI.ApiKeyPath);
                AddLogEntry("Or set the NCBI_API_KEY_FILE environemnt variable to the API key file path");
                AddLogEntry("For more information, see https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/");
            }
            People people = new People(DB);
            int    totalPeopleInPersonList = people.PersonList.Count;
            int    numberOfPeopleProcessed = 0;

            toolStripProgressBar1.Minimum = 0;
            toolStripProgressBar1.Maximum = totalPeopleInPersonList;

            foreach (Person person in people.PersonList)
            {
                numberOfPeopleProcessed++;
                try
                {
                    // If continuing from interruption, only harvest unharvested people
                    if ((!ContinueFromInterruption) || (!person.Harvested))
                    {
                        AddLogEntry("Getting publications for " + person.Last + " (" + person.Setnb + "), number " + numberOfPeopleProcessed.ToString() + " of " + totalPeopleInPersonList.ToString());
                        toolStripProgressBar1.Value = numberOfPeopleProcessed;
                        double AverageMilliseconds;
                        int    NumPublications = harvester.GetPublications(ncbi, pubTypes, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds);
                        if (InterruptClicked)
                        {
                            AddLogEntry("Publication harvesting was interrupted");
                            UpdateDatabaseStatus();
                            return;
                        }
                        AddLogEntry("Wrote " + NumPublications.ToString() + " publications, average write time " + Convert.ToString(Math.Round(AverageMilliseconds, 1)) + " ms");
                        UpdateDatabaseStatus();
                    }
                    else
                    {
                        AddLogEntry("Already retrieved publications for " + person.Last + " (" + person.Setnb + ")");
                    }
                }
                catch (Exception ex)
                {
                    AddLogEntry("An error occurred while reading publications for " + person.Last + " (" + person.Setnb + "): " + ex.Message);
                }
            }

            AddLogEntry("Finished reading publications");
            UpdateDatabaseStatus();
        }
Beispiel #2
0
        public void GetPeopleFromInputXLS()
        {
            // Import input1.xls into the database
            Database  DB        = new Database("Publication Harvester Unit Test");
            Harvester harvester = new Harvester(DB);

            harvester.CreateTables();
            harvester.ImportPeople(AppDomain.CurrentDomain.BaseDirectory
                                   + "\\Unit Tests\\TestPeople\\input1.xls");
            DataTable Results = DB.ExecuteQuery(
                @"SELECT Setnb, First, Middle, Last, Name1, Name2, Name3, Name4, Name5, Name6,
                        MedlineSearch, Harvested, Error, ErrorMessage
                   FROM People"
                );

            // Test each person
            for (int Row = 0; Row < Results.Rows.Count; Row++)
            {
                Person person = new Person(Results.Rows[Row], Results.Columns);
                switch (person.Setnb)
                {
                case "A6009400":
                    Assert.IsTrue(person.First == "Jan");
                    Assert.IsTrue(person.Middle == "");
                    Assert.IsTrue(person.Last == "Van Eys");
                    Assert.IsTrue(person.Names.Length == 3);
                    Assert.IsTrue(person.Names[0] == "van eys j");
                    Assert.IsTrue(person.Names[1] == "vaneys j");
                    Assert.IsTrue(person.Names[2] == "eys jv");
                    Assert.IsTrue(person.MedlineSearch == "(\"van eys j\"[au] OR \"vaneys j\"[au] OR \"eys jv\"[au])");
                    break;

                case "A5401532":
                    Assert.IsTrue(person.First == "Louis");
                    Assert.IsTrue(person.Middle == "");
                    Assert.IsTrue(person.Last == "Tobian");
                    Assert.IsTrue(person.Names.Length == 3);
                    Assert.IsTrue(person.Names[0] == "tobian l");
                    Assert.IsTrue(person.Names[1] == "tobian l jr");
                    Assert.IsTrue(person.Names[2] == "tobian lj");
                    Assert.IsTrue(person.MedlineSearch == "(\"tobian l\"[au] OR \"tobian l jr\"[au] OR \"tobian lj\"[au])");
                    break;

                case "A5501586":
                    Assert.IsTrue(person.First == "Keith");
                    Assert.IsTrue(person.Middle == "B");
                    Assert.IsTrue(person.Last == "Reemtsma");
                    Assert.IsTrue(person.Names.Length == 6);
                    Assert.IsTrue(person.Names[0] == "reemtsma k");
                    Assert.IsTrue(person.Names[1] == "reemtsma kb");
                    Assert.IsTrue(person.Names[2] == "test data");
                    Assert.IsTrue(person.Names[3] == "more test data");
                    Assert.IsTrue(person.Names[4] == "test data name 5");
                    Assert.IsTrue(person.Names[5] == "test data name 6");
                    Assert.IsTrue(person.MedlineSearch == "((\"reemtsma k\"[au] OR \"reemtsma kb\"[au]) AND 1956:2000[dp])");
                    break;

                case "A5702471":
                    Assert.IsTrue(person.First == "Roger");
                    Assert.IsTrue(person.Middle == "");
                    Assert.IsTrue(person.Last == "Guillemin");
                    Assert.IsTrue(person.Names.Length == 2);
                    Assert.IsTrue(person.Names[0] == "guillemin r");
                    Assert.IsTrue(person.Names[1] == "guillemin rc");
                    Assert.IsTrue(person.MedlineSearch ==
                                  "(\"guillemin rc\"[au] OR (\"guillemin r\"[au] NOT (Electrodiagn Ther[ta] OR Phys Rev Lett[ta] OR vegas[ad] OR lindle[au])))"
                                  );
                    break;
                }
            }
        }