예제 #1
0
 private Article GetNewArticle(ArticleFinderResult item, List<Name> names, List<Category> categories, string rawDocumentText)
 {
     return new Article
     {
         ArticleId = GetUrlHash(item.URL),
         Categories = categories,
         Names = names,
         ArticleTitle = item.Title,
         ArticleSource = item.Source,
         ArticleDescription = item.Description,
         //add the document content (which is document text extracted by this app) or add the rawDocumentText (returned by the service)
         //document content extracted by this app is typically more readable (no \n or \t chars everywhere), text returned by the service is often full of these so
         //adding the cleanest text when possible
         ArticleText = (string.IsNullOrEmpty(item.Content) ? rawDocumentText : item.Content),
         ArticleUrl = item.URL
     };
 }
예제 #2
0
        //Contacts the calais service and gets the response RDF string then calls GetCategoreisAndPeople to get names and categories from the document,
        //finally calls GetNewArticle to new up a Model.Article and return it
        private Article GenerateArticle(ArticleFinderResult item)
        {
            sb.Clear();
            if (string.IsNullOrEmpty(item.RawHtml)) //got plain text content from this item so use params for text
            {
                try
                {
                    sb.Append(csSOAP.Enlighten(App_Resources.openCalaisKey.ToString(), item.Title + " " + item.Content, Params.GetTextParamsXML()));
                }
                catch (Exception ex)
                {
                    if (ex.GetType() == typeof(TimeoutException)) //the service sometimes doesn't respond so need to catch this
                    {
                        Trace.TraceInformation(DateTime.Now.ToString() + " Timeout in content: " + item.URL); // error logged in log file (see program.cs)
                        SessionInfo.Instance.AddServiceError(); // keep track of the number of service errors
                    }
                    if (ex.GetType() == typeof(MessageSecurityException))
                    {
                        Trace.TraceInformation(DateTime.Now.ToString() + " Message security exception: " + item.URL + " " + ex.Message); // error logged in log file (see program.cs)
                        SessionInfo.Instance.AddServiceError();
                    }
                    else
                    {
                        Trace.TraceInformation(DateTime.Now.ToString() + " UNEXPECTED ERROR: " + item.URL + " " + ex.Message); // error logged in log file (see program.cs)
                        SessionInfo.Instance.AddServiceError();
                    }

                }
            }
            else //just raw HTML so use params for html
            {
                try
                {
                    sb.Append(csSOAP.Enlighten(App_Resources.openCalaisKey.ToString(), item.RawHtml, Params.GetHTMLParamsXML()));
                }
                catch (Exception ex)
                {
                    if (ex.GetType() == typeof(TimeoutException))
                    {
                        Trace.TraceInformation(DateTime.Now.ToString() + " Timeout in HTML: " + item.URL);
                        SessionInfo.Instance.AddServiceError();
                    }
                    if (ex.GetType() == typeof(MessageSecurityException))
                    {
                        Trace.TraceInformation(DateTime.Now.ToString() + " Message security exception: " + item.URL);
                        SessionInfo.Instance.AddServiceError();
                    }
                    else
                    {
                        Trace.TraceInformation(DateTime.Now.ToString() + " UNEXPECTED ERROR: " + item.URL + " " + ex.Message);
                        SessionInfo.Instance.AddServiceError();
                    }
                }
            }
            List<Name> names; //all the names in the documnet content
            List<Category> categories; //any document categories identified
            string rawDocumentText;  //document content as returned by the service
            GetCategoreisAndPeople(sb, out names, out categories, out rawDocumentText);
            return GetNewArticle(item, names, categories, rawDocumentText);
        }