Ejemplo n.º 1
0
        /// <summary>
        /// Write the publications report
        /// </summary>
        /// <param name="writer">Writer to write the report to</param>
        public void PubsReport(ArrayList SetnbsToSkip, StreamWriter writer, ReportStatus StatusCallback, ReportMessage MessageCallback)
        {
            // Write the header row
            string[] columns = { "setnb", "pmid",  "journal_name", "year",  "Month",
                                 "day",   "title", "Volume",       "issue", "position","nbauthors","Bin",
                                 "Pages", "Publication_type" };
            writer.WriteLine(String.Join(",", columns));

            // Write the row for each person
            People people = new People(DB, PeopleTable);
            int    Total  = people.PersonList.Count;
            int    Number = 0;

            foreach (Person person in people.PersonList)
            {
                Number++;
                StatusCallback(Number, Total, person, false);

                // Skip the person if the Setnb is in SetnbsToSkip

                if ((SetnbsToSkip == null) || (!SetnbsToSkip.Contains(person.Setnb)))
                {
                    // Get the person's publications -- this will throw an exception
                    // if there are no publications so catch it and use the message
                    // callback
                    Publications pubs = null;
                    try
                    {
                        pubs = new Publications(DB, person, PeoplePublicationsTable, false);
                    }
                    catch (Exception ex)
                    {
                        MessageCallback("Unable to retrive publications for " + person.Last + " (" + person.Setnb + "): " + ex.Message);
                    }

                    if (pubs != null && pubs.PublicationList != null)
                    {
                        foreach (Publication pub in pubs.PublicationList)
                        {
                            // Write each row
                            writer.WriteLine(PubsReportRow(person, pub));
                        }
                    }
                }
                else
                {
                    MessageCallback("Skipping " + person.Last + " (" + person.Setnb + ")");
                }
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Write the MeSH Heading report
        /// </summary>
        /// <param name="writer">Writer to send the report to</param>
        public void MeSHHeadingReport(StreamWriter writer, ReportStatus StatusCallback, ReportMessage MessageCallback)
        {
            // Write the header
            writer.WriteLine("setnb,year,heading,count");

            // The MeSH Heading report has one row per person per year per heading
            People people = new People(DB, PeopleTable);
            int    Total  = people.PersonList.Count;
            int    Count  = 0;

            foreach (Person person in people.PersonList)
            {
                // Report status
                Count++;
                StatusCallback(Count, Total, person, false);

                // Catch any errors, report them, and continue
                try
                {
                    // Find the minimum and maximum year for the person
                    int          MinYear = 0;
                    int          MaxYear = 0;
                    Publications pubs    = new Publications(DB, person, PeoplePublicationsTable, false);
                    Hashtable    years   = new Hashtable();
                    if (pubs.PublicationList != null)
                    {
                        foreach (Publication pub in pubs.PublicationList)
                        {
                            if (MinYear == 0 || MinYear > pub.Year)
                            {
                                MinYear = pub.Year;
                            }
                            if (MaxYear == 0 || MaxYear < pub.Year)
                            {
                                MaxYear = pub.Year;
                            }

                            // Go through each of the MeSH headings and count how many
                            // occurrences of each heading are in each year. Store each
                            // count in a hashtable keyed by heading, which in turn is
                            // stored in a hashtable keyed by year.
                            if (!years.ContainsKey(pub.Year))
                            {
                                years[pub.Year] = new Hashtable();
                            }
                            Hashtable yearHeadings = (Hashtable)years[pub.Year];
                            if (pub.MeSHHeadings != null)
                            {
                                foreach (string Heading in pub.MeSHHeadings)
                                {
                                    if (!yearHeadings.ContainsKey(Heading))
                                    {
                                        yearHeadings[Heading] = 0;
                                    }
                                    yearHeadings[Heading] = ((int)yearHeadings[Heading]) + 1;
                                }
                            }
                        }
                    }

                    // Write the heading rows for each year
                    for (int Year = MinYear; Year <= MaxYear; Year++)
                    {
                        // Write the rows for that person's year to the writer
                        if (years.ContainsKey(Year))
                        {
                            Hashtable yearHeadings = (Hashtable)years[Year];
                            if (yearHeadings != null)
                            {
                                foreach (string Heading in yearHeadings.Keys)
                                {
                                    StringWriter swriter = new StringWriter();
                                    swriter.Write(person.Setnb);                                 // setnb
                                    Reports.WriteCSV(Year.ToString(), swriter);                  // year
                                    Reports.WriteCSV(Heading, swriter);                          // heading
                                    Reports.WriteCSV(yearHeadings[Heading].ToString(), swriter); // count
                                    writer.WriteLine(swriter.ToString());
                                }
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    MessageCallback(ex.Message);
                }
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Add rows to the People report
        /// </summary>
        /// <param name="writer">Writer to write the CSV rows to</param>
        public void PeopleReport(ArrayList SetnbsToSkip, StreamWriter writer, ReportStatus StatusCallback, ReportMessage MessageCallback)
        {
            // Write the header row -- this must be generated dynamically
            // based on the values in PeopleReportSections

            // write the keys
            writer.Write("setnb,year");

            // write a set of column names for each element in PeopleReportSections
            for (int i = 0; i < PeopleReportSections.Length; i++)
            {
                string   values          = PeopleReportSections[i].ToLower().Trim();
                string[] BaseColumnNames =
                {
                    "pubcount",              "wghtd_pubcount",      "pubcount_pos1",
                    "wghtd_pubcount_pos1",   "pubcount_posN",       "wghtd_pubcount_posN",
                    "pubcount_posM",         "wghtd_pubcount_posM", "pubcount_posNTL",
                    "wghtd_pubcount_posNTL", "pubcount_pos2",       "wghtd_pubcount_pos2"
                };
                if (values == "all")
                {
                    // all bins -- use the base column names as-is
                    writer.Write("," + String.Join(",", BaseColumnNames));
                }
                else
                {
                    // string any +'s from the value type, so "1+2+3" turns into "123"
                    values = values.Replace("+", "");

                    // replace pubcount_posM with 123pubcount_posM
                    // replace wghtd_pubcount_pos1 with wghtd_123pubcount_pos1
                    for (int j = 0; j < BaseColumnNames.Length; j++)
                    {
                        string Column;
                        if (BaseColumnNames[j].Contains("wghtd_"))
                        {
                            Column = BaseColumnNames[j].Replace("wghtd_", "wghtd_" + values);
                        }
                        else
                        {
                            Column = values + BaseColumnNames[j];
                        }
                        writer.Write("," + Column);
                    }
                }
            }

            writer.WriteLine();

            // Write the row for each person
            People people = new People(DB, PeopleTable);
            int    Total  = people.PersonList.Count;
            int    Number = 0;

            foreach (Person person in people.PersonList)
            {
                Number++;
                StatusCallback(Number, Total, person, false);

                // Skip the person if the Setnb is in SetnbsToSkip
                if ((SetnbsToSkip == null) || (!SetnbsToSkip.Contains(person.Setnb)))
                {
                    // Get the person's publications. If there are no publications for
                    // the person, this will throw an error.
                    Publications pubs;
                    try
                    {
                        pubs = new Publications(DB, person, PeoplePublicationsTable, false);
                    }
                    catch (Exception ex)
                    {
                        MessageCallback(ex.Message);
                        pubs = null;
                    }

                    // Sort the list of publications
                    if (pubs != null)
                    {
                        PublicationComparer Comparer = new PublicationComparer();
                        Comparer.DB               = DB;
                        Comparer.person           = person;
                        Comparer.publicationTypes = PubTypes;
                        Array.Sort(pubs.PublicationList, Comparer);

                        // Find the minimum and maximum years
                        int YearMinimum = pubs.PublicationList[0].Year;
                        int YearMaximum = pubs.PublicationList[0].Year;
                        if (pubs.PublicationList != null)
                        {
                            foreach (Publication pub in pubs.PublicationList)
                            {
                                if (pub.Year < YearMinimum)
                                {
                                    YearMinimum = pub.Year;
                                }
                                if (pub.Year > YearMaximum)
                                {
                                    YearMaximum = pub.Year;
                                }
                            }
                        }

                        // Write each row
                        for (int Year = YearMinimum; Year <= YearMaximum; Year++)
                        {
                            StatusCallback(Year - YearMinimum, YearMaximum - YearMinimum, person, true);
                            writer.WriteLine(ReportRow(person, pubs, Year));
                        }
                    }
                }
                else
                {
                    MessageCallback("Skipping " + person.Last + " (" + person.Setnb + ")");
                }
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Create a row in the People report
        /// </summary>
        /// <param name="person">Person to write</param>
        /// <param name="Pubs">Publications to use as input</param>
        /// <param name="Year">Year to write</param>
        /// <returns>The row in CSV format</returns>
        public string ReportRow(Person person, Publications Pubs, int Year)
        {
            // This function has been optimized so that the software only loops through the list of publications
            // once. To do this, the list is first sorted in order of year, publication type "bin", author
            // position type and PMID. (PMID is only there so that the ordering of the list is easily predictable.)
            //
            // The function builds one row in the report by constructing an array of values, and then joining
            // that array using commas. (There are no strings with commas, so this will be a valid CSV row.)
            // The row is divided into sections: a set of columns for each bin, one column per author position.

            StringBuilder sb     = new StringBuilder();
            StringWriter  writer = new StringWriter(sb);

            // Write the keys
            // setnb -- Person identifier
            // year -- Year of transition
            writer.Write(person.Setnb + ",");
            writer.Write(Year.ToString());


            // The array has been sorted, so we an search for the year. Note that the
            // binary search may not return the first matching index, so we need to rewind.
            PublicationYearFinder YearFinder = new PublicationYearFinder();
            int Index = Array.BinarySearch(Pubs.PublicationList, Year, YearFinder);

            while ((Index > 0) && (Pubs.PublicationList[Index - 1].Year == Year))
            {
                Index--;
            }

            // Get the counts for each publication type "bin"
            // The bins are defined in the PeopleReportSections array, which
            // contains either "all", "i+j+k+..+y+z" or "n"

            // Query the PublicationTypes table to find all of the pub types,
            // and use them to build a Hashtable, indexed by publication type
            // category, that contains a Counts() object for that type
            Hashtable CategoryCounts = new Hashtable();
            DataTable CategoryTable  = DB.ExecuteQuery(
                @"SELECT DISTINCT PubTypeCategoryID FROM PubTypeCategories
                         ORDER BY PubTypeCategoryID;");  // Order by Category ID so it doesn't break the optimization
            int NumCategories = CategoryTable.Rows.Count;

            int[] Categories = new int[NumCategories];
            for (int RowNum = 0; RowNum < CategoryTable.Rows.Count; RowNum++)
            {
                int Category = Convert.ToInt32(CategoryTable.Rows[RowNum]["PubTypeCategoryID"]);
                Categories[RowNum]       = Category;
                CategoryCounts[Category] = new Counts(
                    Pubs.PublicationList, ref Index, Year, Category,
                    PubTypes, DB, person, Weights, PeoplePublicationsTable);
            }

            // For each section in PeopleReportSections, write the appropriate section,
            // using the Counts() object that was just calculated and stuck into
            // the CategoryCounts hashtable
            for (int SectionNum = 0; SectionNum < PeopleReportSections.Length; SectionNum++)
            {
                string Section = PeopleReportSections[SectionNum];
                if (Section == "all")
                {
                    // The section is "all" -- generate a count of all values
                    Counts[] AllCountObjects = new Counts[NumCategories];
                    for (int i = 0; i < NumCategories; i++)
                    {
                        AllCountObjects[i] = (Counts)CategoryCounts[Categories[i]];
                    }
                    Counts AllCounts = new Counts(AllCountObjects);
                    AllCounts.WriteCounts(writer);
                }
                else if (Section.Contains("+"))
                {
                    // The section contains a list of categories separated with +'s
                    // This is a sum of categories (like "1+2+3")
                    string[] SectionSplit    = Section.Split('+');
                    Counts[] SumCountObjects = new Counts[SectionSplit.Length];
                    for (int i = 0; i < SectionSplit.Length; i++)
                    {
                        string OneSection = SectionSplit[i];
                        if (!Publications.IsNumeric(OneSection))
                        {
                            throw new Exception("ReportSections contains invalid section '" + Section + "'");
                        }
                        int SectionValue = Convert.ToInt32(OneSection);
                        if (CategoryCounts.ContainsKey(SectionValue))
                        {
                            Counts OneBinCounts = (Counts)CategoryCounts[SectionValue];
                            SumCountObjects[i] = OneBinCounts;
                        }
                        else
                        {
                            throw new Exception("ReportSections contains invalid section '" + Section + "'");
                        }
                    }
                    Counts SumCounts = new Counts(SumCountObjects);
                    SumCounts.WriteCounts(writer);
                }
                else
                {
                    // The section contains a single bin -- generate a Counts object
                    // and write it out. (Make sure it's a real category!)
                    if (!Publications.IsNumeric(Section))
                    {
                        throw new Exception("ReportSections contains invalid section '" + Section + "'");
                    }
                    int SectionValue = Convert.ToInt32(Section);
                    if (CategoryCounts.ContainsKey(SectionValue))
                    {
                        Counts SingleBinCounts = (Counts)CategoryCounts[Categories[SectionValue]];
                        SingleBinCounts.WriteCounts(writer);
                    }
                    else
                    {
                        throw new Exception("ReportSections contains invalid section '" + Section + "'");
                    }
                }
            }

            return(sb.ToString());
        }
Ejemplo n.º 5
0
            /// <summary>
            /// Retrieve counts from a publication list
            /// </summary>
            /// <param name="PublicationList">Publication list to retrieve counts from,
            /// sorted by year, publication type and author position</param>
            /// <param name="Index">Offset in the publication list of the first publication
            /// matching the year and publication type</param>
            /// <param name="Year">Year to match for</param>
            /// <param name="PublicationType">Publication type to match for</param>
            public Counts(Publication[] PublicationList, ref int Index, int Year, int PublicationType,
                          PublicationTypes PubTypes, Database DB, Person person, Hashtable Weights, string PeoplePublicationsTable)
            {
                // Return zero counts if the publication list is empty
                if (PublicationList.Length == 0)
                {
                    return;
                }

                // Return zero counts if the index is out of bounds
                if ((Index < 0) || (Index >= PublicationList.Length))
                {
                    return;
                }

                // Return zero counts if the index doesn't point to a match -- that means
                // there are no matches
                Publication pub     = PublicationList[Index];
                int         PubType = PubTypes.GetCategoryNumber(pub.PubType);

                if ((pub.Year != Year) || (PubType != PublicationType))
                {
                    return;
                }

                // If we get this far, we have a match. Move forward through the publication
                // list, adding to the counts, until we find a non-matching publication or
                // the list runs out.
                do
                {
                    // Get the weight for the journal
                    float Weight = 0;
                    if (pub.Journal != null && Weights.ContainsKey(pub.Journal))
                    {
                        Weight += (float)Weights[pub.Journal];
                    }

                    // Get the position type, and increment the correct counter
                    Harvester.AuthorPositions PositionType;
                    Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, PeoplePublicationsTable);
                    switch (PositionType)
                    {
                    case Harvester.AuthorPositions.First:
                        First++;
                        FirstWeighted += Weight;
                        break;

                    case Harvester.AuthorPositions.Last:
                        Last++;
                        LastWeighted += Weight;
                        break;

                    case Harvester.AuthorPositions.Second:
                        Second++;
                        SecondWeighted += Weight;
                        break;

                    case Harvester.AuthorPositions.NextToLast:
                        NextToLast++;
                        NextToLastWeighted += Weight;
                        break;

                    case Harvester.AuthorPositions.Middle:
                    case Harvester.AuthorPositions.None:
                        Middle++;
                        MiddleWeighted += Weight;
                        break;
                    }
                    Index++;
                    if (Index < PublicationList.Length)
                    {
                        pub     = PublicationList[Index];
                        PubType = PubTypes.GetCategoryNumber(pub.PubType);
                    }
                } while ((Index < PublicationList.Length) &&
                         (PublicationList[Index].Year == Year) &&
                         (PubType == PublicationType));
            }
Ejemplo n.º 6
0
        /// <summary>
        /// Retrieve the publications for a person and write them to the database
        /// </summary>
        /// <param name="ncbi">NCBI web query object</param>
        /// <param name="pubTypes">PublicationTypes object</param>
        /// <param name="person">Person to retrieve publications for</param>
        /// <param name="StatusCallback">Callback function to return status</param>
        /// <param name="MessageCallback">Callback function to send messages</param>
        /// <param name="AverageMilliseconds">Average time (in milliseconds) of each publication write</param>
        /// <returns>Number of publications written</returns>
        public int GetPublications(NCBI ncbi, PublicationTypes pubTypes, Person person,
                                   GetPublicationsStatus StatusCallback, GetPublicationsMessage MessageCallback,
                                   CheckForInterrupt InterruptCallback, out double AverageMilliseconds)
        {
            ArrayList Parameters;

            DateTime StartTime;
            DateTime EndTime;
            double   TotalMilliseconds = 0;

            AverageMilliseconds = 0;
            int numberFound   = 0;
            int numberWritten = 0;

            // Double-check that the person is really unharvested. If we try to
            // write publications for a person who already has publications, it will
            // cause an error -- and that could happen if this person was already
            // written from a duplicate person earlier.
            Parameters = new ArrayList();
            Parameters.Add(Database.Parameter(person.Setnb));
            int HarvestedCount = DB.GetIntValue("SELECT Count(*) FROM People WHERE Setnb = ? AND Harvested = 1", Parameters);

            if (HarvestedCount > 0)
            {
                MessageCallback("Already harvested publications for " + person.Last + " (" + person.Setnb + ")", false);
                Parameters = new ArrayList();
                Parameters.Add(Database.Parameter(person.Setnb));
                return(DB.GetIntValue("SELECT Count(*) FROM PeoplePublications WHERE Setnb = ?", Parameters));
            }


            MessageCallback("Retrieving data from NCBI", true);


            // Find any other people with the same names and search criteria.
            // Any publications found for this person should also be found
            // for them, so when we write the rows to PeoplePublications later
            // we'll also write them for the other people as well.

            // Look in the database for any other people with the same
            // values for name1, name2, name3, name4, name5, name6, and MedlineSearch.
            // Write their PeoplePublications as well.
            string NamesClause = "";

            Parameters = new ArrayList();
            for (int i = 0; i < 6; i++)
            {
                if (i < person.Names.Length)
                {
                    Parameters.Add(Database.Parameter(person.Names[i]));
                    NamesClause += " Name" + ((int)(i + 1)).ToString() + " = ? AND ";
                }
                else
                {
                    NamesClause += " Name" + ((int)(i + 1)).ToString() + " IS NULL AND ";
                }
            }
            Parameters.Add(Database.Parameter(person.MedlineSearch));
            Parameters.Add(Database.Parameter(person.Setnb));
            DataTable Results = DB.ExecuteQuery("SELECT " + Database.PEOPLE_COLUMNS +
                                                @"FROM People
                                                  WHERE Harvested = 0 AND "
                                                + NamesClause +
                                                @" MedlineSearch = ?
                                                    AND Setnb <> ?", Parameters
                                                );
            ArrayList DuplicatePeople = new ArrayList();

            foreach (DataRow Row in Results.Rows)
            {
                Person dupe = new Person(Row, Results.Columns);
                DuplicatePeople.Add(dupe);
                MessageCallback("Also writing publications for " + dupe.Last + " (" + dupe.Setnb + ") with same names and search criteria", false);
            }



            // Search NCBI -- if an error is thrown, write that error to the database
            string results;

            try
            {
                results = ncbi.Search(person.MedlineSearch);
                if (results.Substring(0, 100).Contains("Error occurred"))
                {
                    // NCBI returns an HTML error page in the results
                    //
                    // <html>
                    // <body>
                    // <br/><h2>Error occurred: Unable to obtain query #1</h2><br/>
                    // ...
                    //
                    // If NCBI returns an empty result set with no publications, it will give the error:
                    // Error occurred: Empty result - nothing todo
                    //
                    // That error should generate a warning and mark the person as harvested in the database.
                    // Any other error should be written to the database as an error.
                    string Error = results.Substring(results.IndexOf("Error occurred"));
                    if (results.Contains("<"))
                    {
                        Error = Error.Substring(0, Error.IndexOf("<"));
                    }
                    string Message;
                    if (Error.ToLower().Contains("empty result"))
                    {
                        Message = "Warning for "
                                  + person.Last + " (" + person.Setnb + "): no publications found (NCBI returned empty results)";
                        person.Harvested = true;
                        person.WriteToDB(DB);
                    }
                    else
                    {
                        Message = "Error reading publications for "
                                  + person.Last + " (" + person.Setnb + "): NCBI returned '" + Error + "'";
                        person.WriteErrorToDB(DB, Message);
                    }
                    MessageCallback(Message, false);
                    return(0);
                }
            }
            catch (Exception ex)
            {
                string Message = "Error reading publications for "
                                 + person.Last + " (" + person.Setnb + "): " + ex.Message;
                person.WriteErrorToDB(DB, Message);
                MessageCallback(Message, false);
                return(0);
            }

            Publications mpr = new Publications(results, pubTypes);

            if (mpr.PublicationList != null)
            {
                foreach (Publication publication in mpr.PublicationList)
                {
                    numberFound++;

                    // Exit immediately if the user interrupted the harvest
                    if (InterruptCallback())
                    {
                        return(numberWritten);
                    }

                    try
                    {
                        // Calculate the average time, to return in the callback status function
                        StartTime = DateTime.Now;

                        // Add the publication to PeoplePublications
                        // First find the author position and calculate the position type
                        int AuthorPosition = 0;
                        for (int i = 1; (publication.Authors != null) && (AuthorPosition == 0) && (i <= publication.Authors.Length); i++)
                        {
                            foreach (string name in person.Names)
                            {
                                if (StringComparer.CurrentCultureIgnoreCase.Equals(
                                        publication.Authors[i - 1], name //.ToUpper()
                                        ))
                                {
                                    AuthorPosition = i;
                                }
                                else if (name == "*")
                                {
                                    AuthorPosition = -1;
                                }
                            }
                        }

                        // If the PMID is 0, we don't have a way to process the publication
                        // and it was probably a Medline search result error.
                        if (publication.PMID == int.MinValue)
                        {
                            string errorMessage = "Found an invalid publication";
                            if (!string.IsNullOrEmpty(publication.Title))
                            {
                                errorMessage += " (Title = '" + publication.Title + "')";
                            }
                            person.WriteErrorToDB(DB, errorMessage);
                            MessageCallback(errorMessage, false);
                        }
                        else if (publication.PMID == 0)
                        {
                            string errorMessage = "WARNING: Found a publication with PMID = 0, not marking this as an error";
                            if (!string.IsNullOrEmpty(publication.Title))
                            {
                                errorMessage += " (Title = '" + publication.Title + "')";
                            }
                            MessageCallback(errorMessage, false);
                        }

                        // If for some reason the author doesn't exist in the publication, send a message back
                        else if (AuthorPosition == 0)
                        {
                            MessageCallback("Publication " + publication.PMID + " does not contain author " + person.Setnb, false);
                        }
                        else
                        {
                            // Write the publication to the database
                            if (Publications.WriteToDB(publication, DB, pubTypes, Languages))
                            {
                                // Exit immediately if the user interrupted the harvest
                                if (InterruptCallback())
                                {
                                    return(numberWritten);
                                }

                                // Only increment the publication count if the publication
                                // is actually written or already in the database
                                numberWritten++;

                                // Only add the row to PeoplePublications if the publication
                                // was written, or was already in the database. (For example,
                                // if the publication is not in English, it won't be written.)

                                Publications.WritePeoplePublicationsToDB(DB, person, publication);

                                // Write the publication for each of the other people
                                foreach (Person dupe in DuplicatePeople)
                                {
                                    Publications.WritePeoplePublicationsToDB(DB, dupe, publication);
                                }

                                // Calculate the average time per publication in milliseconds
                                EndTime = DateTime.Now;
                                TimeSpan Difference = EndTime - StartTime;
                                TotalMilliseconds  += Difference.TotalMilliseconds;
                                AverageMilliseconds = TotalMilliseconds / numberWritten;
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        person.WriteErrorToDB(DB, ex.Message);
                        MessageCallback("Error writing publication " + publication.PMID.ToString() + ": " + ex.Message, false);
                    }
                    StatusCallback(numberFound, mpr.PublicationList.Length, (int)AverageMilliseconds);
                }
            }

            // Make sure each of the people with the same names and search query
            // are marked as harvested and have their errors cleared
            foreach (Person dupe in DuplicatePeople)
            {
                Parameters = new ArrayList();
                Parameters.Add(Database.Parameter(dupe.Setnb));
                DB.ExecuteNonQuery(
                    @"UPDATE People
                         SET Harvested = 1, Error = NULL, ErrorMessage = NULL
                       WHERE Setnb = ?", Parameters);
            }

            // Once the publications are all read, updated People.Harvested, as part of
            // the fault-tolerance scheme -- PeoplePublications rows are only "final" when
            // this value is updated for the person. Any others can be cleared using
            // ClearDataAfterInterruption().
            Parameters = new ArrayList();
            Parameters.Add(Database.Parameter(person.Setnb));
            DB.ExecuteNonQuery(@"UPDATE People
                                    SET Harvested = 1
                                  WHERE Setnb = ?", Parameters);

            return(numberWritten);
        }