/// <summary> /// Invoke the BHL "NameSearch" API to determine if the given entity is a scientific name /// recognized by BHL. /// </summary> /// <param name="entity"></param> /// <returns>True if the entity is a scientific name, False otherwise.</returns> private static bool IsSciName(EntityRecord entity) { bool found = false; // In the Azure output, Scientific Names are categorized as "Other". if (entity.Type.ToLower() == "other") { // Use the BHL Api to search for the name in BHL string nameSearchResponse = BHLApi3.NameSearch(entity.Name, BHLApi3.ResponseFormat.Xml, Config.BhlApiKey); // The response will contain a NameConfirmed element that starts with the search string // if the name was found. The following is an ugly brute-force evaluation of the XML // API response. found = nameSearchResponse.Contains("<NameConfirmed>" + entity.Name); } return(found); }
/// <summary> /// Download the text to be analyzed. /// </summary> private static void DownloadText() { // Create the input directory if it does not exist, or clear it if it does if (Directory.Exists(Config.InputFolder)) { string[] files = Directory.GetFiles(Config.InputFolder); foreach (string file in files) { File.Delete(file); } } else { Directory.CreateDirectory(Config.InputFolder); } // Get the item text from the BHL API string itemMetadataResponse = BHLApi3.GetItemMetadata(_itemID, true, true, false, BHLApi3.ResponseFormat.Xml, Config.BhlApiKey); // Extract the text from the API response XDocument xml = XDocument.Parse(itemMetadataResponse); foreach (XElement page in xml.Root .Elements("Result") .Elements("Item") .Elements("Pages") .Elements("Page")) { string pageID = page.Element("PageID").Value; string pageText = page.Element("OcrText").Value; // Write the text of each page to a file File.WriteAllText(string.Format("{0}\\{1}.txt", Config.InputFolder, pageID), pageText, System.Text.Encoding.UTF8); } }