public override string ConvertToString(string path, string blockText) { Spire.Doc.Document doc = null; string text = null; try { doc = new Spire.Doc.Document(); doc.LoadFromFile(path); try { doc.Sections[0].HeadersFooters.Header.ChildObjects.Clear(); doc.Sections[0].HeadersFooters.Footer.ChildObjects.Clear(); } catch { } // 这里使用了Spire Doc免费版,免费版有篇幅限制。在加载或操作Word文档时,要求Word文档不超过500个段落,25个表格。如您有更高的需求,请自行购买、升级使用付费版。 text = doc.GetText(); text = text.Replace("#", "").Replace('\r', '#').Replace('\n', '#'); text = Regex.Replace(text, @"[^\u4e00-\u9fa5\《\》\(\)\——\;\,\。\“\”\!\#]", ""); text = new Regex("[#]+").Replace(text, "@@").Trim(); text = TextFormat(text, blockText); } catch (Exception e) { } finally { if (doc != null) { doc.Close(); } } return(text); }
public override string ConvertToString(string path) { Spire.Doc.Document doc = null; string text = null; try { doc = new Spire.Doc.Document(); doc.LoadFromFile(path); try { doc.Sections[0].HeadersFooters.Header.ChildObjects.Clear(); doc.Sections[0].HeadersFooters.Footer.ChildObjects.Clear(); } catch { } text = doc.GetText(); text = text.Replace("#", "").Replace('\r', '#').Replace('\n', '#'); text = Regex.Replace(text, @"[^\u4e00-\u9fa5\《\》\(\)\——\;\,\。\“\”\!\#]", ""); text = new Regex("[#]+").Replace(text, "@@").Trim(); text = TextFormat(text); } catch (Exception e) { } finally { if (doc != null) { doc.Close(); } } return(text); }
string get_text_from_word_by_spire(string path, Spire.Doc.Document doc) { doc.LoadFromFile(path); try { doc.Sections[0].HeadersFooters.Header.ChildObjects.Clear(); doc.Sections[0].HeadersFooters.Footer.ChildObjects.Clear(); } catch { } return(doc.GetText()); }
public string text() { if (string.IsNullOrEmpty(text_)) { var document = new Document(); document.LoadFromFile(path_); text_ = document.GetText().replace(new string[, ] { { "\r\n", " " }, { "\t", " " } }); } return(text_); }
/// <summary> /// Method that searches all the employees in a file /// </summary> /// <param name="file"> /// File to be searched and added to the sql database /// </param> /// <param name="connectionString"> /// Azure blob storage connection string /// </param> /// <param name="containerName"> /// Azure blob storage container name /// </param> /// <returns> /// Dictionary with the results found /// </returns> public Dictionary <string, int> SearchEmployees(D.Models.File file, string connectionString, string containerName) { string filePath = GeneratePath(file.Name); string extension = Path.GetExtension(file.Name).ToLower(); string text; DownloadFile(file.Name, filePath, connectionString, containerName); if (extension == ".pdf") { PdfDocument doc = new PdfDocument(); doc.LoadFromFile(filePath); StringBuilder buffer = new StringBuilder(); foreach (PdfPageBase page in doc.Pages) { buffer.Append(page.ExtractText()); } doc.Close(); text = buffer.ToString(); text = text.Replace(Environment.NewLine, " "); text = DeleteRepeatedSpaces(text); return(SearchEmployeesAux1(file, filePath, text)); } else if (extension == ".docx") { Spire.Doc.Document doc = new Spire.Doc.Document(); doc.LoadFromFile(filePath); text = doc.GetText(); doc.Close(); return(SearchEmployeesAux1(file, filePath, text)); } else { text = File.ReadAllText(filePath); return(SearchEmployeesAux1(file, filePath, text)); } }