예제 #1
0
        public bool IsReadable(string path)
        {
            if (OdfReader.IsValidFile(path))
            {
                return(true);
            }

            if (IsReadablePdf(path))
            {
                return(true);
            }

            if (BinaryReader.IsValidFile(path))
            {
                return(true);
            }

            if (OpenSDKReader.IsValidFile(path))
            {
                return(true);
            }

            if (OutlookReader.IsValidFile(path))
            {
                return(true);
            }

            return(false);
        }
예제 #2
0
        public string ReadContents()
        {
            CheckInternalState();

            string contents = string.Empty;

            //Stopwatch stopwatch = Stopwatch.StartNew();

            if (IsReadablePdf(_InternalFilePath))
            {
                contents = ReadPdfContents();
            }

            if (BinaryReader.IsValidFile(_InternalFilePath))
            {
                IBinaryReader reader = BinaryReader.GetNew(_InternalFilePath);
                contents = reader.ReadContents();
            }

            if (OpenSDKReader.IsValidFile(_InternalFilePath))
            {
                IOpenSDKReader reader = OpenSDKReader.GetNew(_InternalFilePath);
                contents = reader.ReadContents();
            }

            if (OutlookReader.IsValidFile(_InternalFilePath))
            {
                IOutlookReader reader = OutlookReader.GetNew(_InternalFilePath);
                contents = reader.ReadContents();

                if (IncludeEmbeddedFiles)
                {
                    StringBuilder builder = new StringBuilder(contents);

                    foreach (string tempEmbeddedFile in reader.GetEmbeddedFiles())
                    {
                        //NOTE: the GetEmbeddedFiles method of OutlookDataReader creates and
                        //returns temp files for the attachments, if they exist, therefore
                        //the caller method is responsible for disposing of them here when
                        //done

                        IFileReader embeddedFileReader = new FileReader();
                        if (embeddedFileReader.IsReadable(tempEmbeddedFile))
                        {
                            embeddedFileReader.Open(tempEmbeddedFile);
                            builder.Append(embeddedFileReader.ReadContents());
                        }

                        try
                        {
                            File.Delete(tempEmbeddedFile);
                        }
                        catch (Exception) { }
                    }

                    contents = builder.ToString();
                }
            }

            //stopwatch.Stop();

            //if (stopwatch.Elapsed.TotalMinutes > 1)
            //{
            //    //took a long time to read this file, log it
            //    System.IO.File.AppendAllText("C:\\temp\\crawler\\logs\\LongRunning.log",
            //        _InternalFilePath + "," + stopwatch.Elapsed.TotalMinutes.ToString("0.00") + Environment.NewLine);
            //}

            return(contents);
        }