/// <summary>
        /// Invoke the BHL "NameSearch" API to determine if the given entity is a scientific name
        /// recognized by BHL.
        /// </summary>
        /// <param name="entity"></param>
        /// <returns>True if the entity is a scientific name, False otherwise.</returns>
        private static bool IsSciName(EntityRecord entity)
        {
            bool found = false;

            // In the Azure output, Scientific Names are categorized as "Other".
            if (entity.Type.ToLower() == "other")
            {
                // Use the BHL Api to search for the name in BHL
                string nameSearchResponse = BHLApi3.NameSearch(entity.Name, BHLApi3.ResponseFormat.Xml, Config.BhlApiKey);

                // The response will contain a NameConfirmed element that starts with the search string
                // if the name was found.  The following is an ugly brute-force evaluation of the XML
                // API response.
                found = nameSearchResponse.Contains("<NameConfirmed>" + entity.Name);
            }

            return(found);
        }
Esempio n. 2
0
        /// <summary>
        /// Download the text to be analyzed.
        /// </summary>
        private static void DownloadText()
        {
            // Create the input directory if it does not exist, or clear it if it does
            if (Directory.Exists(Config.InputFolder))
            {
                string[] files = Directory.GetFiles(Config.InputFolder);
                foreach (string file in files)
                {
                    File.Delete(file);
                }
            }
            else
            {
                Directory.CreateDirectory(Config.InputFolder);
            }

            // Get the item text from the BHL API
            string itemMetadataResponse =
                BHLApi3.GetItemMetadata(_itemID, true, true, false, BHLApi3.ResponseFormat.Xml, Config.BhlApiKey);

            // Extract the text from the API response
            XDocument xml = XDocument.Parse(itemMetadataResponse);

            foreach (XElement page in xml.Root
                     .Elements("Result")
                     .Elements("Item")
                     .Elements("Pages")
                     .Elements("Page"))
            {
                string pageID   = page.Element("PageID").Value;
                string pageText = page.Element("OcrText").Value;

                // Write the text of each page to a file
                File.WriteAllText(string.Format("{0}\\{1}.txt", Config.InputFolder, pageID),
                                  pageText, System.Text.Encoding.UTF8);
            }
        }