예제 #1
0
        private void readFileContent(string path)
        {
            TextExtractor extractor = new TextExtractor(path);
            string        text      = extractor.ExtractText();

            Console.WriteLine(text);
        }
예제 #2
0
        // read a .doc file
        static List <string> ReadDocFile(string file)
        {
            TextExtractor extractor = new TextExtractor(file);
            string        text      = extractor.ExtractText(); //The string 'text' is now loaded with the text from the Word Document

            return(AddTextToList(text));
        }
예제 #3
0
        public static void ExecuteWordExtraction(string sourceFileName, string destFileName)
        {
            TextExtractor extractor = new TextExtractor(sourceFileName);
            string        wordText  = extractor.ExtractText();

            FileOperators.FileWrite(destFileName, wordText);
        }
예제 #4
0
        private static bool TryExtractText(string file, out string fileText)
        {
            fileText = null;
            bool success = false;

            if (file.EndsWith(".docx") || file.EndsWith(".doc"))
            {
                try
                {
                    TextExtractor t = new TextExtractor(file);
                    fileText = t.ExtractText();
                    success  = true;
                }
                catch { }
            }
            else if (file.EndsWith(".txt"))
            {
                try
                {
                    fileText = File.ReadAllText(file);
                    success  = true;
                }
                catch { }
            }

            return(success);
        }
예제 #5
0
        private void btnGozat_Click(object sender, EventArgs e)
        {
            //Dosyayı seç
            OpenFileDialog openDialog = new OpenFileDialog();

            openDialog.Filter = "Documents (*.txt, *.pdf, *.docx)|*.txt; *.docx; *.pdf;";
            openDialog.Title  = "Select Document";

            if (openDialog.ShowDialog() == DialogResult.Cancel)
            {
                return;
            }
            string ext = Path.GetExtension(openDialog.FileName);


            if (ext == ".txt") //Eğer kullanıcı txt uzantılı bir dosya seçmiş ise
            {
                FileStream fStr;
                Encoding   objEncoding = Encoding.Default;
                try
                {
                    fStr = new FileStream(openDialog.FileName, FileMode.Open, FileAccess.Read);
                    StreamReader sr = new StreamReader(fStr, objEncoding);
                    txtKelime.Text = sr.ReadToEnd();
                    sr.Close();
                }
                catch (Exception exception)
                {
                    MessageBox.Show("Error opening file", exception.ToString());
                }
            }
            else if (ext == ".docx") //Eğer kullanıcı docx uzantılı bir dosya seçmiş ise
            {
                try
                {
                    TextExtractor extractor = new TextExtractor(openDialog.FileName);
                    txtKelime.Text = extractor.ExtractText().Replace("\n", " ").Replace("	", " ").Replace("  ", " ");
                }
                catch (Exception exception)
                {
                    MessageBox.Show("Error opening file", exception.ToString());
                }
            }
            else if (ext == ".pdf") //Eğer kullanıcı pdf uzantılı bir dosya seçmiş ise
            {
                try
                {
                    PdfOku pdfOku = new PdfOku();
                    txtKelime.Text = pdfOku.getPdfResult(openDialog.FileName);
                }
                catch (Exception exception)
                {
                    MessageBox.Show("Error opening file", exception.ToString());
                }
            }
            else //Eğer kullanıcı farklı bir dosya seçmişse(ki mümkün değil)
            {
                MessageBox.Show("Geçerli dosya seçiniz");
            }
        }
예제 #6
0
        public static bool ReadFileCompateText(string path, string s)
        {
            TextExtractor extractor = new TextExtractor(path);
            //The string 'text' is now loaded with the text from the Word Document
            string text = extractor.ExtractText();

            return(Comparer.CheckTextIfMatch(s, text));
        }
예제 #7
0
        //string filePath;

        public string gettext(string filePath)
        {
            string fileData = "";

            Code7248.word_reader.TextExtractor extractor = new TextExtractor(filePath);

            fileData = extractor.ExtractText();

            return(fileData);
        }
예제 #8
0
        /// <summary>
        /// Uses the buffer class to read the documents in the .docx document
        /// An instance of the textextractor class is created to read the documents in the path
        /// The extracttext method is used to get the text and store in the
        /// </summary>
        /// <param name="path"></param>
        /// <returns>DocxText</returns>
        string ReadDocx(string path)
        {
            StringBuilder bufferText    = new StringBuilder();
            string        DocxText      = bufferText.ToString();
            TextExtractor textextractor = new TextExtractor(path);
            string        text          = textextractor.ExtractText();

            bufferText.Append(text);
            Console.WriteLine(text);
            return(DocxText);
        }
 public void wordReader(string path)
 {
     try
     {
         TextExtractor extractor = new TextExtractor(path);
         fileContent.Append(extractor.ExtractText());
     }
     catch (Exception ex)
     {
         fileContent.Append("");
     }
 }
 public static bool FindTextInDocFile(string fileFullPath, string text)
 {
     try
     {
         var extractor = new TextExtractor(fileFullPath);
         return(extractor.ExtractText().IndexOf(text, StringComparison.OrdinalIgnoreCase) >= 0);
     }
     catch (Exception)
     {
         return(false);
     }
 }
예제 #11
0
        // Заморочился с возможностью открывать текстовые файлы для зашифровки содержимого (без сохранения обратно)
        private void tsmiOpen_Click(object sender, EventArgs e)
        {
            OpenFileDialog openFile = new OpenFileDialog();

            // Фильтруем расширения для отображения текстовых файлов
            openFile.Filter = "Все текстовые файлы|*.txt;*rtf;*.doc;*docx;|Файлы .txt|*.txt|Файлы .rtf|*.rtf|Файлы .doc|*.doc|Файлы .docx|*.docx";

            if (openFile.ShowDialog() == DialogResult.OK)
            {
                string type = Path.GetExtension(openFile.FileName); // Извлекаем расширение
                switch (type)
                {
                case ".txt":
                    try
                    {
                        rtbIn.Text = File.ReadAllText(openFile.FileName, Encoding.Default);
                    }
                    catch
                    {
                        MessageBox.Show("Ошибка загрузки");
                    }
                    break;

                case ".rtf":
                    try
                    {
                        rtbIn.LoadFile(openFile.FileName, RichTextBoxStreamType.RichText);
                    }
                    catch
                    {
                        MessageBox.Show("Ошибка загрузки");
                    }
                    break;

                case ".doc":      // Работа с Interop.Word заставила задуматься, но вот чудо...
                case ".docx":     // ...случайно нашёл в сети библиотеку Code7248.word_reader.dll
                    try
                    {
                        TextExtractor extractor = new TextExtractor(openFile.FileName);
                        string        contents  = extractor.ExtractText();
                        rtbIn.Text = contents;
                    }
                    catch
                    {
                        MessageBox.Show("Ошибка загрузки");
                    }
                    break;
                }
            }
        }
예제 #12
0
        /// <summary>
        /// Reads DOC and DOCX file types and extracts the words in each file
        /// Requires: The file path is in doc or docx format only
        /// </summary>
        /// <param name="filenameWithPath">path of DOC or DOCX document including filename</param>
        /// <exception cref="PlatformNotSupportedException">Thrown when the file to read is not of supported
        /// doc format.
        /// </exception>
        /// <returns>
        /// A Dictionary where the Key contains the filename and the Value contains the entire wordlist
        /// </returns>
        internal static Dictionary <string, List <string> > readDocFiles(string filenameWithPath)
        {
            Contract.Requires <PlatformNotSupportedException>(System.IO.Path.GetExtension(filenameWithPath).Equals(".doc") ||
                                                              System.IO.Path.GetExtension(filenameWithPath).Equals(".docx"));
            List <string> result = new List <string>();
            Dictionary <string, List <string> > listresult = new Dictionary <string, List <string> >();
            TextExtractor extractor = new TextExtractor(filenameWithPath);
            string        temp      = extractor.ExtractText().Trim();

            result.AddRange(temp.Split(new string[] { "\t\r\n", " " }, StringSplitOptions.RemoveEmptyEntries));

            listresult.Add(filenameWithPath, result);
            return(listresult);
        }
예제 #13
0
        public DocumentVersionReturnModel Create(DocumentVersion documentVersion)
        {
            TextExtractor extractor = new TextExtractor(documentVersion.filePath);

            return(new DocumentVersionReturnModel
            {
                Id = documentVersion.Id,
                Text = extractor.ExtractText(),
                //FilePath = documentVersion.filePath,
                DocumentId = documentVersion.DocumentId,
                ModifiedBy = documentVersion.ModifiedBy,
                CreationDate = documentVersion.CreationDate,
                VersionNumber = documentVersion.VersionNumber,
            });
        }
예제 #14
0
 /*
  * Reads .doc files
  * Useage patterns from (code7248, 2012)
  * @param string file (filepath)
  * @return string[]
  */
 static private string[] ReadDoc(string file)
 {
     string[] words = null;//sets new array
     try
     {
         TextExtractor te   = new TextExtractor(file); //readers text
         string        line = te.ExtractText();        //sets string to read line
         words = SetArray(line, file);                 //sets array to line read
     }
     catch (Exception err)
     {
         MessageBox.Show("Error: " + err);
     }
     return(words);//returns array to read() method
 }
예제 #15
0
        private void SearchForDocFiles(string p_sPath)
        {
            FileInfo[] Files;
            try
            {
                dir   = new DirectoryInfo(@p_sPath);
                Files = dir.GetFiles("*.doc");
            }
            catch
            {
                InvalidDirectory dirForm = new InvalidDirectory();
                dirForm.Show();
                return;
            }

            foreach (FileInfo file in Files)
            {
                string fileName = dir.ToString() + file.ToString();
                try
                {
                    TextExtractor extractor   = new TextExtractor(fileName);
                    string        text        = extractor.ExtractText();
                    string        textToLower = text.ToLower();

                    if (StringContains(textToLower, searchWords))
                    {
                        resultListBox.Items.Add(file.ToString());
                        CVdirs.Add(fileName);
                    }
                }
                catch
                {
                    Console.WriteLine("could not open file: " + fileName);
                }
            }
        }
예제 #16
0
        private void button3_Click(object sender, EventArgs e)
        {

            timer1.Stop();
            button5.Text = "P L A Y";
            OpenFileDialog dlg = new OpenFileDialog();
            string filepath;
            dlg.Filter = "Text files(*.txt)|*.txt|Doc files(*.doc)|*.doc|Docx files(*.docx)|*.docx|All files(*.*)|*.*";

            if (dlg.ShowDialog() == DialogResult.OK)
            {
                filepath = dlg.FileName.ToString();

                string strText = string.Empty;
                try
                {

                    string ext = System.IO.Path.GetExtension(dlg.FileName);
                  
                    if (ext == ".doc" || ext == ".docx")
                    {
                        Code7248.word_reader.TextExtractor extractor = new TextExtractor(dlg.FileName);

                        string contents = extractor.ExtractText();
                        MyText.Text = contents;
                    }
                    else
                    {

                        MyText.Text = File.ReadAllText(dlg.FileName);
                        
                    }
                   MyText.Text =  MyText.Text.Replace("\t", " ");
                    MyText.Text = MyText.Text.Replace(Environment.NewLine, "\n");
                    if (mychar == '\n')
                    {
                        MyText.Text = MyText.Text.Replace("\n", " ");
                    }
                    string nolstrTextine = MyText.Text.Replace("\n", System.Convert.ToString(mychar));
                    i = 0;
                    if (mychar == '\n')
                    {
                        nolstrTextine = nolstrTextine.Replace(".", ". " + mychar);
                    }
                
                    words = nolstrTextine.Split(new char[] { mychar }, StringSplitOptions.RemoveEmptyEntries); ;
              
                    progressBar1.Maximum = words.Length;

                    timer1.Stop();
                    button5.Text = "P L A Y";


                }
                catch (Exception ex)
                {

                    MessageBox.Show(ex.Message);
                }
            }

        }