예제 #1
0
        /// <summary>
        /// Get text from the binary using our custom HTML utilities
        /// </summary>
        /// <returns>The text of the HTML file or null if we could not process the text</returns>
        public override string GetTextFromDocumentBinary()
        {
            string text = null;

            // If we have no bytes then we can't do anything.
            if (Bytes == null || Bytes.Length == 0)
            {
                // Log the problem.
                log.Error("Tried to extract text from empty bytes for file " + Name);
                return(null);
            }

            try
            {
                System.IO.StreamReader theReader = new StreamReader(new MemoryStream(Bytes));
                text = DocUtils.StripHTML(theReader.ReadToEnd());
            }
            catch (Exception e)
            {
                log.Error("Failed to get the text from the HTML file " + Name, e);
            }

            return(text);
        }