private Article GetNewArticle(ArticleFinderResult item, List<Name> names, List<Category> categories, string rawDocumentText) { return new Article { ArticleId = GetUrlHash(item.URL), Categories = categories, Names = names, ArticleTitle = item.Title, ArticleSource = item.Source, ArticleDescription = item.Description, //add the document content (which is document text extracted by this app) or add the rawDocumentText (returned by the service) //document content extracted by this app is typically more readable (no \n or \t chars everywhere), text returned by the service is often full of these so //adding the cleanest text when possible ArticleText = (string.IsNullOrEmpty(item.Content) ? rawDocumentText : item.Content), ArticleUrl = item.URL }; }
//Contacts the calais service and gets the response RDF string then calls GetCategoreisAndPeople to get names and categories from the document, //finally calls GetNewArticle to new up a Model.Article and return it private Article GenerateArticle(ArticleFinderResult item) { sb.Clear(); if (string.IsNullOrEmpty(item.RawHtml)) //got plain text content from this item so use params for text { try { sb.Append(csSOAP.Enlighten(App_Resources.openCalaisKey.ToString(), item.Title + " " + item.Content, Params.GetTextParamsXML())); } catch (Exception ex) { if (ex.GetType() == typeof(TimeoutException)) //the service sometimes doesn't respond so need to catch this { Trace.TraceInformation(DateTime.Now.ToString() + " Timeout in content: " + item.URL); // error logged in log file (see program.cs) SessionInfo.Instance.AddServiceError(); // keep track of the number of service errors } if (ex.GetType() == typeof(MessageSecurityException)) { Trace.TraceInformation(DateTime.Now.ToString() + " Message security exception: " + item.URL + " " + ex.Message); // error logged in log file (see program.cs) SessionInfo.Instance.AddServiceError(); } else { Trace.TraceInformation(DateTime.Now.ToString() + " UNEXPECTED ERROR: " + item.URL + " " + ex.Message); // error logged in log file (see program.cs) SessionInfo.Instance.AddServiceError(); } } } else //just raw HTML so use params for html { try { sb.Append(csSOAP.Enlighten(App_Resources.openCalaisKey.ToString(), item.RawHtml, Params.GetHTMLParamsXML())); } catch (Exception ex) { if (ex.GetType() == typeof(TimeoutException)) { Trace.TraceInformation(DateTime.Now.ToString() + " Timeout in HTML: " + item.URL); SessionInfo.Instance.AddServiceError(); } if (ex.GetType() == typeof(MessageSecurityException)) { Trace.TraceInformation(DateTime.Now.ToString() + " Message security exception: " + item.URL); SessionInfo.Instance.AddServiceError(); } else { Trace.TraceInformation(DateTime.Now.ToString() + " UNEXPECTED ERROR: " + item.URL + " " + ex.Message); SessionInfo.Instance.AddServiceError(); } } } List<Name> names; //all the names in the documnet content List<Category> categories; //any document categories identified string rawDocumentText; //document content as returned by the service GetCategoreisAndPeople(sb, out names, out categories, out rawDocumentText); return GetNewArticle(item, names, categories, rawDocumentText); }