private void readFileContent(string path) { TextExtractor extractor = new TextExtractor(path); string text = extractor.ExtractText(); Console.WriteLine(text); }
// read a .doc file static List <string> ReadDocFile(string file) { TextExtractor extractor = new TextExtractor(file); string text = extractor.ExtractText(); //The string 'text' is now loaded with the text from the Word Document return(AddTextToList(text)); }
public static void ExecuteWordExtraction(string sourceFileName, string destFileName) { TextExtractor extractor = new TextExtractor(sourceFileName); string wordText = extractor.ExtractText(); FileOperators.FileWrite(destFileName, wordText); }
private static bool TryExtractText(string file, out string fileText) { fileText = null; bool success = false; if (file.EndsWith(".docx") || file.EndsWith(".doc")) { try { TextExtractor t = new TextExtractor(file); fileText = t.ExtractText(); success = true; } catch { } } else if (file.EndsWith(".txt")) { try { fileText = File.ReadAllText(file); success = true; } catch { } } return(success); }
private void btnGozat_Click(object sender, EventArgs e) { //Dosyayı seç OpenFileDialog openDialog = new OpenFileDialog(); openDialog.Filter = "Documents (*.txt, *.pdf, *.docx)|*.txt; *.docx; *.pdf;"; openDialog.Title = "Select Document"; if (openDialog.ShowDialog() == DialogResult.Cancel) { return; } string ext = Path.GetExtension(openDialog.FileName); if (ext == ".txt") //Eğer kullanıcı txt uzantılı bir dosya seçmiş ise { FileStream fStr; Encoding objEncoding = Encoding.Default; try { fStr = new FileStream(openDialog.FileName, FileMode.Open, FileAccess.Read); StreamReader sr = new StreamReader(fStr, objEncoding); txtKelime.Text = sr.ReadToEnd(); sr.Close(); } catch (Exception exception) { MessageBox.Show("Error opening file", exception.ToString()); } } else if (ext == ".docx") //Eğer kullanıcı docx uzantılı bir dosya seçmiş ise { try { TextExtractor extractor = new TextExtractor(openDialog.FileName); txtKelime.Text = extractor.ExtractText().Replace("\n", " ").Replace(" ", " ").Replace(" ", " "); } catch (Exception exception) { MessageBox.Show("Error opening file", exception.ToString()); } } else if (ext == ".pdf") //Eğer kullanıcı pdf uzantılı bir dosya seçmiş ise { try { PdfOku pdfOku = new PdfOku(); txtKelime.Text = pdfOku.getPdfResult(openDialog.FileName); } catch (Exception exception) { MessageBox.Show("Error opening file", exception.ToString()); } } else //Eğer kullanıcı farklı bir dosya seçmişse(ki mümkün değil) { MessageBox.Show("Geçerli dosya seçiniz"); } }
public static bool ReadFileCompateText(string path, string s) { TextExtractor extractor = new TextExtractor(path); //The string 'text' is now loaded with the text from the Word Document string text = extractor.ExtractText(); return(Comparer.CheckTextIfMatch(s, text)); }
//string filePath; public string gettext(string filePath) { string fileData = ""; Code7248.word_reader.TextExtractor extractor = new TextExtractor(filePath); fileData = extractor.ExtractText(); return(fileData); }
/// <summary> /// Uses the buffer class to read the documents in the .docx document /// An instance of the textextractor class is created to read the documents in the path /// The extracttext method is used to get the text and store in the /// </summary> /// <param name="path"></param> /// <returns>DocxText</returns> string ReadDocx(string path) { StringBuilder bufferText = new StringBuilder(); string DocxText = bufferText.ToString(); TextExtractor textextractor = new TextExtractor(path); string text = textextractor.ExtractText(); bufferText.Append(text); Console.WriteLine(text); return(DocxText); }
public void wordReader(string path) { try { TextExtractor extractor = new TextExtractor(path); fileContent.Append(extractor.ExtractText()); } catch (Exception ex) { fileContent.Append(""); } }
public static bool FindTextInDocFile(string fileFullPath, string text) { try { var extractor = new TextExtractor(fileFullPath); return(extractor.ExtractText().IndexOf(text, StringComparison.OrdinalIgnoreCase) >= 0); } catch (Exception) { return(false); } }
// Заморочился с возможностью открывать текстовые файлы для зашифровки содержимого (без сохранения обратно) private void tsmiOpen_Click(object sender, EventArgs e) { OpenFileDialog openFile = new OpenFileDialog(); // Фильтруем расширения для отображения текстовых файлов openFile.Filter = "Все текстовые файлы|*.txt;*rtf;*.doc;*docx;|Файлы .txt|*.txt|Файлы .rtf|*.rtf|Файлы .doc|*.doc|Файлы .docx|*.docx"; if (openFile.ShowDialog() == DialogResult.OK) { string type = Path.GetExtension(openFile.FileName); // Извлекаем расширение switch (type) { case ".txt": try { rtbIn.Text = File.ReadAllText(openFile.FileName, Encoding.Default); } catch { MessageBox.Show("Ошибка загрузки"); } break; case ".rtf": try { rtbIn.LoadFile(openFile.FileName, RichTextBoxStreamType.RichText); } catch { MessageBox.Show("Ошибка загрузки"); } break; case ".doc": // Работа с Interop.Word заставила задуматься, но вот чудо... case ".docx": // ...случайно нашёл в сети библиотеку Code7248.word_reader.dll try { TextExtractor extractor = new TextExtractor(openFile.FileName); string contents = extractor.ExtractText(); rtbIn.Text = contents; } catch { MessageBox.Show("Ошибка загрузки"); } break; } } }
/// <summary> /// Reads DOC and DOCX file types and extracts the words in each file /// Requires: The file path is in doc or docx format only /// </summary> /// <param name="filenameWithPath">path of DOC or DOCX document including filename</param> /// <exception cref="PlatformNotSupportedException">Thrown when the file to read is not of supported /// doc format. /// </exception> /// <returns> /// A Dictionary where the Key contains the filename and the Value contains the entire wordlist /// </returns> internal static Dictionary <string, List <string> > readDocFiles(string filenameWithPath) { Contract.Requires <PlatformNotSupportedException>(System.IO.Path.GetExtension(filenameWithPath).Equals(".doc") || System.IO.Path.GetExtension(filenameWithPath).Equals(".docx")); List <string> result = new List <string>(); Dictionary <string, List <string> > listresult = new Dictionary <string, List <string> >(); TextExtractor extractor = new TextExtractor(filenameWithPath); string temp = extractor.ExtractText().Trim(); result.AddRange(temp.Split(new string[] { "\t\r\n", " " }, StringSplitOptions.RemoveEmptyEntries)); listresult.Add(filenameWithPath, result); return(listresult); }
public DocumentVersionReturnModel Create(DocumentVersion documentVersion) { TextExtractor extractor = new TextExtractor(documentVersion.filePath); return(new DocumentVersionReturnModel { Id = documentVersion.Id, Text = extractor.ExtractText(), //FilePath = documentVersion.filePath, DocumentId = documentVersion.DocumentId, ModifiedBy = documentVersion.ModifiedBy, CreationDate = documentVersion.CreationDate, VersionNumber = documentVersion.VersionNumber, }); }
/* * Reads .doc files * Useage patterns from (code7248, 2012) * @param string file (filepath) * @return string[] */ static private string[] ReadDoc(string file) { string[] words = null;//sets new array try { TextExtractor te = new TextExtractor(file); //readers text string line = te.ExtractText(); //sets string to read line words = SetArray(line, file); //sets array to line read } catch (Exception err) { MessageBox.Show("Error: " + err); } return(words);//returns array to read() method }
private void SearchForDocFiles(string p_sPath) { FileInfo[] Files; try { dir = new DirectoryInfo(@p_sPath); Files = dir.GetFiles("*.doc"); } catch { InvalidDirectory dirForm = new InvalidDirectory(); dirForm.Show(); return; } foreach (FileInfo file in Files) { string fileName = dir.ToString() + file.ToString(); try { TextExtractor extractor = new TextExtractor(fileName); string text = extractor.ExtractText(); string textToLower = text.ToLower(); if (StringContains(textToLower, searchWords)) { resultListBox.Items.Add(file.ToString()); CVdirs.Add(fileName); } } catch { Console.WriteLine("could not open file: " + fileName); } } }
private void button3_Click(object sender, EventArgs e) { timer1.Stop(); button5.Text = "P L A Y"; OpenFileDialog dlg = new OpenFileDialog(); string filepath; dlg.Filter = "Text files(*.txt)|*.txt|Doc files(*.doc)|*.doc|Docx files(*.docx)|*.docx|All files(*.*)|*.*"; if (dlg.ShowDialog() == DialogResult.OK) { filepath = dlg.FileName.ToString(); string strText = string.Empty; try { string ext = System.IO.Path.GetExtension(dlg.FileName); if (ext == ".doc" || ext == ".docx") { Code7248.word_reader.TextExtractor extractor = new TextExtractor(dlg.FileName); string contents = extractor.ExtractText(); MyText.Text = contents; } else { MyText.Text = File.ReadAllText(dlg.FileName); } MyText.Text = MyText.Text.Replace("\t", " "); MyText.Text = MyText.Text.Replace(Environment.NewLine, "\n"); if (mychar == '\n') { MyText.Text = MyText.Text.Replace("\n", " "); } string nolstrTextine = MyText.Text.Replace("\n", System.Convert.ToString(mychar)); i = 0; if (mychar == '\n') { nolstrTextine = nolstrTextine.Replace(".", ". " + mychar); } words = nolstrTextine.Split(new char[] { mychar }, StringSplitOptions.RemoveEmptyEntries); ; progressBar1.Maximum = words.Length; timer1.Stop(); button5.Text = "P L A Y"; } catch (Exception ex) { MessageBox.Show(ex.Message); } } }