コード例 #1
0
        public override string ConvertToString(string path, string blockText)
        {
            Spire.Doc.Document doc  = null;
            string             text = null;

            try
            {
                doc = new Spire.Doc.Document();
                doc.LoadFromFile(path);
                try
                {
                    doc.Sections[0].HeadersFooters.Header.ChildObjects.Clear();
                    doc.Sections[0].HeadersFooters.Footer.ChildObjects.Clear();
                }
                catch
                { }
                // 这里使用了Spire Doc免费版,免费版有篇幅限制。在加载或操作Word文档时,要求Word文档不超过500个段落,25个表格。如您有更高的需求,请自行购买、升级使用付费版。
                text = doc.GetText();
                text = text.Replace("#", "").Replace('\r', '#').Replace('\n', '#');
                text = Regex.Replace(text, @"[^\u4e00-\u9fa5\《\》\(\)\——\;\,\。\“\”\!\#]", "");
                text = new Regex("[#]+").Replace(text, "@@").Trim();
                text = TextFormat(text, blockText);
            }
            catch (Exception e)
            {
            }
            finally
            {
                if (doc != null)
                {
                    doc.Close();
                }
            }
            return(text);
        }
コード例 #2
0
        public override string ConvertToString(string path)
        {
            Spire.Doc.Document doc  = null;
            string             text = null;

            try
            {
                doc = new Spire.Doc.Document();
                doc.LoadFromFile(path);
                try
                {
                    doc.Sections[0].HeadersFooters.Header.ChildObjects.Clear();
                    doc.Sections[0].HeadersFooters.Footer.ChildObjects.Clear();
                }
                catch
                { }
                text = doc.GetText();
                text = text.Replace("#", "").Replace('\r', '#').Replace('\n', '#');
                text = Regex.Replace(text, @"[^\u4e00-\u9fa5\《\》\(\)\——\;\,\。\“\”\!\#]", "");
                text = new Regex("[#]+").Replace(text, "@@").Trim();
                text = TextFormat(text);
            }
            catch (Exception e)
            {
            }
            finally
            {
                if (doc != null)
                {
                    doc.Close();
                }
            }
            return(text);
        }
コード例 #3
0
 string get_text_from_word_by_spire(string path, Spire.Doc.Document doc)
 {
     doc.LoadFromFile(path);
     try
     {
         doc.Sections[0].HeadersFooters.Header.ChildObjects.Clear();
         doc.Sections[0].HeadersFooters.Footer.ChildObjects.Clear();
     }
     catch
     { }
     return(doc.GetText());
 }
コード例 #4
0
 public string text()
 {
     if (string.IsNullOrEmpty(text_))
     {
         var document = new Document();
         document.LoadFromFile(path_);
         text_ = document.GetText().replace(new string[, ] {
             { "\r\n", " " }, { "\t", " " }
         });
     }
     return(text_);
 }
コード例 #5
0
        /// <summary>
        /// Method that searches all the employees in a file
        /// </summary>
        /// <param name="file">
        /// File to be searched and added to the sql database
        /// </param>
        /// <param name="connectionString">
        /// Azure blob storage connection string
        /// </param>
        /// <param name="containerName">
        /// Azure blob storage container name
        /// </param>
        /// <returns>
        /// Dictionary with the results found
        /// </returns>
        public Dictionary <string, int> SearchEmployees(D.Models.File file, string connectionString, string containerName)
        {
            string filePath  = GeneratePath(file.Name);
            string extension = Path.GetExtension(file.Name).ToLower();
            string text;

            DownloadFile(file.Name, filePath, connectionString, containerName);

            if (extension == ".pdf")
            {
                PdfDocument doc = new PdfDocument();
                doc.LoadFromFile(filePath);
                StringBuilder buffer = new StringBuilder();
                foreach (PdfPageBase page in doc.Pages)
                {
                    buffer.Append(page.ExtractText());
                }

                doc.Close();
                text = buffer.ToString();
                text = text.Replace(Environment.NewLine, " ");
                text = DeleteRepeatedSpaces(text);
                return(SearchEmployeesAux1(file, filePath, text));
            }
            else if (extension == ".docx")
            {
                Spire.Doc.Document doc = new Spire.Doc.Document();
                doc.LoadFromFile(filePath);
                text = doc.GetText();
                doc.Close();
                return(SearchEmployeesAux1(file, filePath, text));
            }
            else
            {
                text = File.ReadAllText(filePath);
                return(SearchEmployeesAux1(file, filePath, text));
            }
        }