Пример #1
0
        /// <summary>
        /// Execute the FindRelated search, create and populate the tables
        /// </summary>
        /// <param name="odbcDsn">ODBC DSN to access the SQL server</param>
        /// <param name="relatedTableName">Name of the FindRelated SQL table to create</param>
        /// <param name="inputFileInfo">FileInfo object with information about the input CSV file</param>
        /// <param name="resume">True if resuming a previous run</param>
        /// <param name="outputFilename">Output filename</param>
        public void Go(
            string odbcDsn,
            string relatedTableName,
            FileInfo inputFileInfo,
            bool resume,
            string outputFilename)
        {
            if (NCBI.ApiKeyExists)
            {
                Trace.WriteLine("Using API key: " + NCBI.ApiKeyPath);
            }
            else
            {
                Trace.WriteLine("Performance is limited to under 3 requests per second.");
                Trace.WriteLine("Consider pasting an API key into " + NCBI.ApiKeyPath);
                Trace.WriteLine("Or set the NCBI_API_KEY_FILE environemnt variable to the API key file path");
                Trace.WriteLine("For more information, see https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/");
            }

            Trace.WriteLine($"Requesting up to {ELINK_IDS_PER_REQUEST} PMIDs per NCBI eLink API request");

            Database db = new Database(odbcDsn);

            string queueTableName = relatedTableName + "_queue";

            InputQueue inputQueue;

            if (!resume)
            {
                if (!CreateOutputFile(outputFilename))
                {
                    return;
                }

                CreateTables(db, relatedTableName, queueTableName);
                inputQueue = new InputQueue(inputFileInfo, db, queueTableName);
            }
            else
            {
                inputQueue = new InputQueue(db, queueTableName);
            }

            while (inputQueue.Next(ELINK_IDS_PER_REQUEST) > 0)
            {
                BackgroundWorker.ReportProgress((100 * inputQueue.Progress) / inputQueue.TotalPmidsAdded);

                Trace.WriteLine($"{DateTime.Now} - executing API query for related articles for {inputQueue.CurrentPmids.Count()} PMIDs");

                // Do the linked publication search for the author's PMIDs and process the results.
                // This returns a Dictionary that maps author publications (from the PeoplePublications table)
                // to linked publications, so each key is one of the author publications read from the DB originally.
                string xml     = null;
                bool   success = false;
                bool   failed  = false;
                while (!success)
                {
                    try
                    {
                        xml     = ExecuteRelatedSearch(inputQueue.CurrentPmids);
                        success = true;
                    }
                    catch (Exception ex)
                    {
                        Trace.WriteLine(DateTime.Now + " - an error occurred while executing the related query, attempting to repeat the search");
                        Trace.WriteLine(ex.Message);
                        Trace.WriteLine(ex.StackTrace);
                        failed = true;
                    }
                }
                if (failed)
                {
                    Trace.WriteLine(DateTime.Now + " - successfully recovered from the error, continuing execution");
                }
                else
                {
                    Dictionary <int, List <int> > relatedSearchResults = GetIdsFromXml(xml, out Dictionary <int, Dictionary <int, RankAndScore> > relatedRanks);

                    bool completed;

                    completed = WriteRelatedRanksToOutputFileAndDatabase(db, relatedTableName, relatedSearchResults, relatedRanks, outputFilename, inputQueue);
                    if (!completed) // WriteRelatedRankToOutputFile() returns false if the user stopped the operation
                    {
                        break;
                    }
                }
            }
            BackgroundWorker.ReportProgress(100);
        }
Пример #2
0
        /// <summary>
        /// Go through all of the ranks and scores retrieved from the server for each PMID and write them to the output file and the database.
        /// </summary>
        /// <param name="db">Database to write to</param>
        /// <param name="relatedTableName">Name of the related table</param>
        /// <param name="relatedSearchResults">NCBI search results parsed into a dictionary that maps queried PMIDs to a list of related PMIDs</param>
        /// <param name="relatedRanks">Dictionary parsed from NCBI search results that maps each queried PMID to a dictionary of related PMIDs and their ranks and scores</param>
        /// <param name="outputFilename">Output file to append to</param>
        /// <param name="inputQueue">Input queue for marking success or error</param>
        /// <returns>True if a lines were successfully added to the file and table, false if an error occurred</returns>
        private bool WriteRelatedRanksToOutputFileAndDatabase(Database db, string relatedTableName,
                                                              Dictionary <int, List <int> > relatedSearchResults, Dictionary <int, Dictionary <int, RankAndScore> > relatedRanks,
                                                              string outputFilename, InputQueue inputQueue)
        {
            if (BackgroundWorker != null && BackgroundWorker.CancellationPending)
            {
                Trace.WriteLine(DateTime.Now + " - stopped");
                return(false);
            }

            foreach (int pmid in relatedSearchResults.Keys)
            {
                List <int> relatedPmids = relatedSearchResults[pmid];

                if (relatedPmids == null)
                {
                    Trace.WriteLine($"{DateTime.Now} - found empty related PMID list for PMID {pmid} ({++_pmidsProcessed} of {inputQueue.TotalPmidsAdded})");
                }
                else if (!relatedRanks.ContainsKey(pmid))
                {
                    Trace.WriteLine($"{DateTime.Now} - no ranks or scores found  for PMID {pmid} ({++_pmidsProcessed} of {inputQueue.TotalPmidsAdded})");
                }
                else
                {
                    Trace.WriteLine($"{DateTime.Now} - found {relatedPmids.Count} results for PMID {pmid} ({++_pmidsProcessed} of {inputQueue.TotalPmidsAdded})");

                    Dictionary <int, RankAndScore> ranksAndScores = relatedRanks[pmid];

                    foreach (int relatedPmid in relatedPmids)
                    {
                        if (!ranksAndScores.ContainsKey(relatedPmid))
                        {
                            Trace.WriteLine(DateTime.Now + " - unable to find related ranks and scores for PMID " + pmid + ", related PMID " + relatedPmid);
                        }
                        else
                        {
                            RankAndScore rankAndScore = ranksAndScores[relatedPmid];
                            string       line         = String.Format("{0},{1},{2},{3}", pmid, relatedPmid, rankAndScore.Rank, rankAndScore.Score);
                            string       output       = line + Environment.NewLine;
                            try
                            {
                                File.AppendAllText(outputFilename, output);
                            }
                            catch (Exception ex)
                            {
                                Trace.WriteLine(DateTime.Now + " - unable to append '" + line + "' to the output file: " + ex.Message);
                                Trace.WriteLine(ex.StackTrace);
                                Trace.WriteLine(DateTime.Now + " - stopping the run, use the Resume button to resume");

                                inputQueue.MarkError(pmid);

                                return(false);
                            }

                            bool written = WriteRelatedRankToDatabase(db, relatedTableName, pmid, relatedPmid, rankAndScore.Rank, rankAndScore.Score);
                            if (!written)
                            {
                                return(false);
                            }
                        }
                    }
                }

                // Mark the PMID processed in the queue
                inputQueue.MarkProcessed(pmid);
            }

            return(true);
        }