/// <summary> /// Determines if the specified page name is in the supplied list /// </summary> /// <param name="pageName">Page Name for which to search</param> /// <param name="items">List in which to search</param> /// <returns>True if found, False if not</returns> private bool PageNameIsInList( string pageName, FindItItem[] items ) { bool searchResult = false; foreach ( FindItItem item in items ) { if ( String.Compare(item.Name, pageName, true) == 0 ) searchResult = true; } return searchResult; }
/// <summary> /// For each page name in the list, it inserts a new row in the database or /// or updates an existing row. Then, any previously-existing rows that /// were not updated are set to inactive. /// </summary> /// <param name="pageID">Identifier of the page whose page names are being updated</param> /// <param name="items">List of page names</param> public int[] PageNameUpdateList( int pageID, FindItItem[] items ) { int[] updateStats = new int[ 4 ]; foreach ( FindItItem item in items ) { PageName pageName = this.PageNameSelectByPageIDAndNameFound( pageID, item.Name ); if ( pageName == null ) { // Insert new page name pageName = new PageName(); pageName.NameFound = item.Name; pageName.NameConfirmed = item.Name; pageName.NameBankID = item.NamebankID; if ( pageName.NameBankID <= 0 ) pageName.NameBankID = null; pageName.Active = !( pageName.NameBankID == null ); pageName.PageID = pageID; pageName.Source = "uBio"; pageName.IsCommonName = false; this.PageNameInsertAuto( pageName ); updateStats[ 0 ]++; // number inserted } else { // Update existing page name if the namebankid has changed if ( ( pageName.NameBankID ?? 0 ) != item.NamebankID ) { pageName.NameBankID = item.NamebankID; if ( pageName.NameBankID <= 0 ) pageName.NameBankID = null; pageName.Active = !( pageName.NameBankID == null ); this.PageNameUpdateAuto( pageName ); updateStats[ 1 ]++; // number updated } else { updateStats[ 3 ]++; // number unchanged } } } // Deactivate any names that are in the database, but were not not returned // by the just-completed UBIO request (this means they've fallen out of the // list of page names for this page) CustomGenericList<PageName> pageNames = this.PageNameSelectByPageID( pageID ); foreach ( PageName pageName in pageNames ) { if ( !this.PageNameIsInList( pageName.NameFound, items ) ) { if ( pageName.NameBankID != null ) { pageName.Active = false; pageName.NameBankID = null; this.PageNameUpdateAuto( pageName ); updateStats[ 2 ]++; // number deleted } else { updateStats[ 3 ]++; // number unchanged } } } return updateStats; }
public FindItItem[] GetUBioNames( int pageID ) { bool tooLarge = false; string webServiceUrl = string.Empty; PageSummaryView ps = new BHLProvider().PageSummarySelectByPageId( pageID ); string filepath = ps.OcrTextLocation; if (new BHLProvider().GetFileAccessProvider(ConfigurationManager.AppSettings["UseRemoteFileAccessProvider"] == "true").GetFileSizeInKB(filepath) <= 5) { // OCR text not too large for URI, so send it in the UBIO request // Get the OCR text and start to build the UBIO url string ocrText = this.GetOcrText( ps ); StringBuilder webServiceUrlSB = new StringBuilder(); webServiceUrlSB.Append( "http://www.ubio.org/webservices/service.php?function=taxonFinder&includeLinks=0&freeText=" ); webServiceUrlSB.Append( System.Web.HttpUtility.UrlEncode( ocrText ) ); // Add the existing page names for this Page to the UBIO url CustomGenericList<PageName> pageNames = new BHLProvider().PageNameSelectByPageID( pageID ); foreach ( PageName pageName in pageNames ) { webServiceUrlSB.Append( System.Web.HttpUtility.UrlEncode( " " + pageName.NameFound ) ); } // Get the final UBIO url webServiceUrl = webServiceUrlSB.ToString(); // If the url is too large after all UrlEncoding is complete, just send the file path if ( ( (long)webServiceUrl.Length / 1024 ) > 5 ) tooLarge = true; } else { tooLarge = true; } // OCR text is too large, so just send the file path //string webServiceUrl = String.Format("http://names.ubio.org/webservices/service.php?function=findIT&url=http://www.botanicus.org/PrimeOcr/{0}/{1}/{2}/{3}.txt&strict=1&keyCode=78bd024866f1df74125f194b764d80333e0d64aa", ps.WebVirtualDirectory, ps.MARCBibID, ps.BarCode, ps.FileNamePrefix); if ( tooLarge ) webServiceUrl = String.Format( "http://www.ubio.org/webservices/service.php?function=taxonFinder&includeLinks=0&url=http://www.botanicus.org/PrimeOcr/mbgserv14/{0}/{1}/{2}/{3}.txt", ps.WebVirtualDirectory, ps.FileRootFolder, ps.BarCode, ps.FileNamePrefix ); List<FindItItem> fiil = new List<FindItItem>(); XmlTextReader reader = null; try { HttpWebRequest req = (HttpWebRequest)WebRequest.Create( webServiceUrl ); req.Method = "POST"; req.Timeout = 15000; HttpWebResponse resp = (HttpWebResponse)req.GetResponse(); reader = new XmlTextReader( (System.IO.Stream)resp.GetResponseStream() ); StringBuilder sb = new StringBuilder(); FindItItem fii = null; string currentStage = ""; while ( reader.Read() ) { if ( reader.NodeType == XmlNodeType.Whitespace ) continue; if ( reader.HasValue ) sb.Append( reader.Value ); if ( currentStage == "nameString" && reader.Value != "" ) { fii.Name = reader.Value; } if ( currentStage == "namebankID" && reader.Value != "" ) { fii.NamebankID = Int32.Parse( reader.Value ); } else if ( reader.NodeType != XmlNodeType.EndElement ) { sb.Append( "\n" + reader.Name + ": " ); currentStage = reader.Name; if ( reader.Name == "entity" ) { fii = new FindItItem(); } } else { if ( reader.Name == "entity" ) { fiil.Add( fii ); } } } } finally { if ( reader != null ) reader.Close(); } return ( fiil.ToArray() ); }
public int[] PageNameUpdateList(int pageID, FindItItem[] items) { BHLProvider provider = new BHLProvider(); return provider.PageNameUpdateList(pageID, items); }
private FindItItem ubioLookup( string name ) { XmlTextReader reader = null; try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create( "http://www.ubio.org/webservices/service.php?function=taxonFinder&includeLinks=0&freeText=" + Server.UrlEncode( name ) ); request.Timeout = 15000; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); reader = new XmlTextReader( (Stream)response.GetResponseStream() ); XmlDocument doc = new XmlDocument(); doc.Load( reader ); FindItItem result = null; // Taking this simple, straight forward approach because we're only expecting one result per check. Will // make parsing algorithm more robust later if needed. XmlNodeList nameList = doc.GetElementsByTagName( "nameString" ); XmlNodeList idList = doc.GetElementsByTagName( "namebankID" ); if ( nameList.Count == 1 ) { result = new FindItItem(); result.Name = nameList[ 0 ].InnerText; if ( idList.Count == 1 ) { result.NamebankID = int.Parse( idList[ 0 ].InnerText ); } } return result; } catch { return null; } finally { if ( reader != null ) { reader.Close(); } } }