Ejemplo n.º 1
0
        /// <summary>
        /// Search for matches in .PDF files.
        /// </summary>
        /// <param name="fileName">The name of the file.</param>
        /// <param name="searchTerms">The terms to search.</param>
        /// <param name="matcher">The matcher object to determine search criteria.</param>
        /// <returns>The matched lines containing the search terms.</returns>
        public override List <MatchedLine> Search(string fileName, IEnumerable <string> searchTerms, Matcher matcher)
        {
            List <MatchedLine> matchedLines = new List <MatchedLine>();
            string             tempDirPath  = System.IO.Path.Combine(AppDomain.CurrentDomain.BaseDirectory, fileName + TempExtractDirectoryName);

            try
            {
                Directory.CreateDirectory(tempDirPath);
                SharpCompress.Archives.IArchive archive = null;

                if (fileName.ToUpper().EndsWith(".ODS") && SharpCompress.Archives.Zip.ZipArchive.IsZipFile(fileName))
                {
                    archive = SharpCompress.Archives.Zip.ZipArchive.Open(fileName);
                }

                if (archive != null)
                {
                    IReader reader = archive.ExtractAllEntries();
                    while (reader.MoveToNextEntry())
                    {
                        if (!reader.Entry.IsDirectory && reader.Entry.Key == "content.xml")
                        {
                            // Ignore symbolic links as these are captured by the original target.
                            if (string.IsNullOrWhiteSpace(reader.Entry.LinkTarget) && !reader.Entry.Key.Any(c => DisallowedCharactersByOperatingSystem.Any(dc => dc == c)))
                            {
                                try
                                {
                                    reader.WriteEntryToDirectory(tempDirPath, new ExtractionOptions()
                                    {
                                        ExtractFullPath = true, Overwrite = true
                                    });
                                    string fullFilePath = System.IO.Path.Combine(tempDirPath, reader.Entry.Key.Replace(@"/", @"\"));
                                    matchedLines = this.GetMatchesFromOdsContentXml(fileName, fullFilePath, searchTerms, matcher);
                                }
                                catch (PathTooLongException ptlex)
                                {
                                    throw new PathTooLongException(string.Format("{0} {1} {2} {3} - {4}", Resources.Strings.ErrorAccessingEntry, reader.Entry.Key, Resources.Strings.InArchive, fileName, ptlex.Message));
                                }
                            }
                        }
                    }

                    if (matcher.CancellationTokenSource.Token.IsCancellationRequested)
                    {
                        matchedLines.Clear();
                    }

                    archive.Dispose();
                }
            }
            finally
            {
                this.RemoveTempDirectory(tempDirPath);
            }

            return(matchedLines);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Search for matches in .ODP files.
        /// </summary>
        /// <param name="fileName">The name of the file.</param>
        /// <param name="searchTerms">The terms to search.</param>
        /// <param name="matcher">The matcher object to determine search criteria.</param>
        /// <returns>The matched lines containing the search terms.</returns>
        public override List <MatchedLine> Search(string fileName, IEnumerable <string> searchTerms, Matcher matcher)
        {
            List <MatchedLine> matchedLines = new List <MatchedLine>();
            int    matchCounter             = 0;
            string tempDirPath = System.IO.Path.Combine(AppDomain.CurrentDomain.BaseDirectory, fileName + TempExtractDirectoryName);

            try
            {
                Directory.CreateDirectory(tempDirPath);
                SharpCompress.Archives.IArchive archive = null;

                if (fileName.ToUpper().EndsWith(".ODP") && SharpCompress.Archives.Zip.ZipArchive.IsZipFile(fileName))
                {
                    archive = SharpCompress.Archives.Zip.ZipArchive.Open(fileName);
                }

                if (archive != null)
                {
                    IReader reader = archive.ExtractAllEntries();
                    while (reader.MoveToNextEntry())
                    {
                        if (!reader.Entry.IsDirectory && reader.Entry.Key == "content.xml")
                        {
                            // Ignore symbolic links as these are captured by the original target.
                            if (string.IsNullOrWhiteSpace(reader.Entry.LinkTarget) && !reader.Entry.Key.Any(c => DisallowedCharactersByOperatingSystem.Any(dc => dc == c)))
                            {
                                try
                                {
                                    reader.WriteEntryToDirectory(tempDirPath, new ExtractionOptions()
                                    {
                                        ExtractFullPath = true, Overwrite = true
                                    });
                                    string        fullFilePath        = System.IO.Path.Combine(tempDirPath, reader.Entry.Key.Replace(@"/", @"\"));
                                    StringBuilder presentationAllText = new StringBuilder();

                                    foreach (XElement element in XDocument.Load(fullFilePath, LoadOptions.None).Descendants().Where(d => d.Name.LocalName == "page"))
                                    {
                                        string slideName = element.Attributes().Where(sn => sn.Name.LocalName == "name").Select(sn => sn.Value).FirstOrDefault();
                                        int    slideNumber;

                                        // Search based on keyword "Slide", not the resources translation.
                                        if (int.TryParse(slideName.Replace("Slide", string.Empty), out slideNumber))
                                        {
                                            string slideAllText = string.Join(Environment.NewLine, element.Descendants().Where(sc => sc.Name.LocalName == "span").Select(sc => sc.Value));

                                            if (!matcher.RegularExpressionOptions.HasFlag(RegexOptions.Multiline))
                                            {
                                                foreach (string searchTerm in searchTerms)
                                                {
                                                    MatchCollection matches = Regex.Matches(slideAllText, searchTerm, matcher.RegularExpressionOptions);            // Use this match for getting the locations of the match.

                                                    if (matches.Count > 0)
                                                    {
                                                        foreach (Match match in matches)
                                                        {
                                                            int    startIndex = match.Index >= IndexBoundary ? match.Index - IndexBoundary : 0;
                                                            int    endIndex   = (slideAllText.Length >= match.Index + match.Length + IndexBoundary) ? match.Index + match.Length + IndexBoundary : slideAllText.Length;
                                                            string matchLine  = slideAllText.Substring(startIndex, endIndex - startIndex);

                                                            while (matchLine.StartsWith("\r") || matchLine.StartsWith("\n"))
                                                            {
                                                                matchLine = matchLine.Substring(1, matchLine.Length - 1);                       // Remove lines starting with the newline character.
                                                            }

                                                            while ((matchLine.EndsWith("\r") || matchLine.EndsWith("\n")) && matchLine.Length > 2)
                                                            {
                                                                matchLine = matchLine.Substring(0, matchLine.Length - 1);                       // Remove lines ending with the newline character.
                                                            }

                                                            Match searchMatch = Regex.Match(matchLine, searchTerm, matcher.RegularExpressionOptions);          // Use this match for the result highlight, based on additional characters being selected before and after the match.
                                                            matchedLines.Add(new MatchedLine
                                                            {
                                                                MatchId    = matchCounter++,
                                                                Content    = string.Format("{0} {1}:\t{2}", Resources.Strings.Slide, slideNumber.ToString(), matchLine),
                                                                SearchTerm = searchTerm,
                                                                FileName   = fileName,
                                                                LineNumber = 1,
                                                                StartIndex = searchMatch.Index + Resources.Strings.Slide.Length + 3 + slideNumber.ToString().Length,
                                                                Length     = searchMatch.Length
                                                            });
                                                        }
                                                    }
                                                }
                                            }
                                            else
                                            {
                                                presentationAllText.AppendLine(slideAllText);
                                            }
                                        }
                                    }

                                    if (matcher.RegularExpressionOptions.HasFlag(RegexOptions.Multiline))
                                    {
                                        matchedLines = matcher.GetMatch(new string[] { string.Join(Environment.NewLine, presentationAllText.ToString()) }, searchTerms, Strings.Slide);
                                    }
                                }
                                catch (PathTooLongException ptlex)
                                {
                                    throw new PathTooLongException(string.Format("{0} {1} {2} {3} - {4}", Resources.Strings.ErrorAccessingEntry, reader.Entry.Key, Resources.Strings.InArchive, fileName, ptlex.Message));
                                }
                            }
                        }
                    }

                    if (matcher.CancellationTokenSource.Token.IsCancellationRequested)
                    {
                        matchedLines.Clear();
                    }

                    archive.Dispose();
                }
            }
            finally
            {
                this.RemoveTempDirectory(tempDirPath);
            }

            return(matchedLines);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Search for matches in zipped archive files.
        /// </summary>
        /// <param name="fileName">The name of the file.</param>
        /// <param name="searchTerms">The terms to search.</param>
        /// <param name="tempDirPath">The temporary extract directory.</param>
        /// <param name="archive">The archive to be searched.</param>
        /// <param name="matcher">The matcher object to determine search criteria.</param>
        /// <returns>The matched lines containing the search terms.</returns>
        private List <MatchedLine> GetMatchedLinesInZipArchive(string fileName, IEnumerable <string> searchTerms, string tempDirPath, SharpCompress.Archives.IArchive archive, Matcher matcher)
        {
            List <MatchedLine> matchedLines = new List <MatchedLine>();

            try
            {
                IReader reader = archive.ExtractAllEntries();
                while (reader.MoveToNextEntry())
                {
                    if (!reader.Entry.IsDirectory)
                    {
                        // Ignore symbolic links as these are captured by the original target.
                        if (string.IsNullOrWhiteSpace(reader.Entry.LinkTarget) && !reader.Entry.Key.Any(c => DisallowedCharactersByOperatingSystem.Any(dc => dc == c)))
                        {
                            try
                            {
                                reader.WriteEntryToDirectory(tempDirPath, new ExtractionOptions()
                                {
                                    ExtractFullPath = true, Overwrite = true
                                });
                                string fullFilePath = Path.Combine(tempDirPath, reader.Entry.Key.Replace(@"/", @"\"));
                                matchedLines.AddRange(FileSearchHandlerFactory.Search(fullFilePath, searchTerms, matcher));

                                if (matchedLines != null && matchedLines.Count > 0)
                                {
                                    // Want the exact path of the file - without the .extract part.
                                    string dirNameToDisplay = fullFilePath.Replace(TempExtractDirectoryName, string.Empty);
                                    matchedLines.Where(ml => string.IsNullOrEmpty(ml.FileName) || ml.FileName.Contains(TempExtractDirectoryName)).ToList()
                                    .ForEach(ml => ml.FileName = dirNameToDisplay);
                                }
                            }
                            catch (PathTooLongException ptlex)
                            {
                                throw new PathTooLongException(string.Format("{0} {1} {2} {3} - {4}", Resources.Strings.ErrorAccessingEntry, reader.Entry.Key, Resources.Strings.InArchive, fileName, ptlex.Message));
                            }
                        }
                    }
                }

                if (matcher.CancellationTokenSource.Token.IsCancellationRequested)
                {
                    matchedLines.Clear();
                }
            }
            catch (ArgumentNullException ane)
            {
                if (ane.Message.Contains("Value cannot be null") && fileName.EndsWith(".gz", StringComparison.OrdinalIgnoreCase))
                {
                    matchedLines = this.DecompressGZipStream(fileName, searchTerms, matcher);
                }
                else if (ane.Message.Contains("String reference not set to an instance of a String."))
                {
                    throw new NotSupportedException(string.Format("{0} {1}. {2}", Resources.Strings.ErrorAccessingFile, fileName, Resources.Strings.FileEncrypted));
                }
                else
                {
                    throw;
                }
            }

            return(matchedLines);
        }