Exemplo n.º 1
0
        // Extract searchable text from a file using IFilterTextReader.
        // Extract text from document, then replace multiple white space sequences with a single space.
        // If IFilterTextReader fails (for example, old Office document; or unknown document type), an exception is logged and null is returned.
        // Prefix is optional text to prepend to the result - such as document filename, metadata properties, anything else to include in search text.

        private static String ExtractTextFromFile(ILogger log, String inputFile, String prefix = null)
        {
            String line;
            String cleanedString = prefix;

            try
            {
                FilterReaderOptions options = new FilterReaderOptions()
                {
                };

                using (var reader = new FilterReader(inputFile, string.Empty, options))
                {
                    while ((line = reader.ReadLine()) != null)
                    {
                        line = line.Trim();
                        if (!String.IsNullOrEmpty(line))
                        {
                            line           = System.Text.RegularExpressions.Regex.Replace(line, @"[,]\s+", " ");
                            line           = System.Text.RegularExpressions.Regex.Replace(line, @"[,]", "");
                            line           = System.Text.RegularExpressions.Regex.Replace(line, @"[^a-zA-Z'\d\s:]", " ");
                            line           = System.Text.RegularExpressions.Regex.Replace(line, @"\s+", " ");
                            cleanedString += line + " ";
                        }
                    }
                } // end reader
            }
            catch (Exception ex)
            {
                log.LogError("ExtractTextFromFile: " + ex.Message);
            }

            return(cleanedString);
        }
Exemplo n.º 2
0
        private static void TryReadFile(FileInfo file)
        {
            var          stream = file.OpenRead();
            FilterReader reader = null;

            try
            {
                FilterReaderOptions filterReaderOptions = new FilterReaderOptions();
                reader = new FilterReader(stream, file.Extension, filterReaderOptions);
                var result = reader.ReadToEnd();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            finally
            {
                reader?.Close();
                stream?.Close();
            }
        }
Exemplo n.º 3
0
        private void SelectButton_Click(object sender, EventArgs e)
        {
            // Create an instance of the open file dialog box.
            var openFileDialog1 = new OpenFileDialog
            {
                // ReSharper disable once LocalizableElement
                Filter      = "Alle files (*.*)|*.*",
                FilterIndex = 1,
                Multiselect = false
            };

            // Process input if the user clicked OK.
            if (openFileDialog1.ShowDialog() == DialogResult.OK)
            {
                FileLabel.Text                     = openFileDialog1.FileName;
                FindTextButton.Enabled             = true;
                TextToFindTextBox.Enabled          = true;
                FindWithRegexButton.Enabled        = true;
                TextToFindWithRegexTextBox.Enabled = true;

                try
                {
                    DisableInput();

                    FilterTextBox.AppendText("*** Processing file '" + openFileDialog1.FileName + "' ***" + Environment.NewLine + Environment.NewLine);
                    Application.DoEvents();
                    var stopWatch = new Stopwatch();

                    var timeoutOption = FilterReaderTimeout.NoTimeout;

                    switch (TimeoutOptionsComboBox.SelectedIndex)
                    {
                    case 0:
                        timeoutOption = FilterReaderTimeout.NoTimeout;
                        break;

                    case 1:
                        timeoutOption = FilterReaderTimeout.TimeoutOnly;
                        break;

                    case 2:
                        timeoutOption = FilterReaderTimeout.TimeoutWithException;
                        break;
                    }

                    var options = new FilterReaderOptions()
                    {
                        DisableEmbeddedContent = DisableEmbeddedContentCheckBox.Checked,
                        IncludeProperties      = IncludePropertiesCheckBox.Checked,
                        ReadIntoMemory         = ReadIntoMemoryCheckBox.Checked,
                        ReaderTimeout          = timeoutOption,
                        Timeout = int.Parse(TimeoutTextBox.Text)
                    };

                    using (var reader = new FilterReader(openFileDialog1.FileName, string.Empty, options))
                    {
                        stopWatch.Start();
                        string line;
                        string tempFileName = Path.GetTempFileName();

                        while ((line = reader.ReadLine()) != null)
                        {
                            FilterTextBox.AppendText(line + Environment.NewLine);
                            Application.DoEvents();
                            System.IO.File.AppendAllLines(tempFileName, new[] { line });
                        }
                        stopWatch.Stop();
                        FilterTextBox.AppendText(Environment.NewLine + "*** DONE IN " + stopWatch.Elapsed + " ***" + Environment.NewLine);
                        Application.DoEvents();
                    }
                }
                catch (Exception exception)
                {
                    DisableInput();
                    FilterTextBox.Text = exception.StackTrace + Environment.NewLine + GetInnerException(exception);
                }
                finally
                {
                    EnableInput();
                }
            }
        }