示例#1
0
        /// <summary>
        /// Returns a new IOpenSDKReader that can read the contents
        /// of the provided format document.  If the file format
        /// is not supported, then an InvalidDataException will
        /// be thrown.  It is possible to do a pre-check using
        /// the IsValidFile method.
        /// </summary>
        /// <param name="path"></param>
        /// <returns></returns>
        public static IOpenSDKReader GetNew(string path)
        {
            string extension = Path.GetExtension(path).ToUpper();

            IOpenSDKReader reader = null;

            if (_WordOpenSDKExtensions.Contains(extension))
            {
                reader = new WordReader(path);
                return(reader);
            }

            if (_PowerPointOpenSDKExtensions.Contains(extension))
            {
                reader = new PowerPointReader(path);
                return(reader);
            }

            if (_ExcelOpenSDKExtensions.Contains(extension))
            {
                reader = new ExcelReader(path);
                return(reader);
            }

            throw new System.IO.InvalidDataException(NOT_VALID_FILE_MESSAGE);
        }
示例#2
0
        public string ReadContents()
        {
            CheckInternalState();

            string contents = string.Empty;

            //Stopwatch stopwatch = Stopwatch.StartNew();

            if (IsReadablePdf(_InternalFilePath))
            {
                contents = ReadPdfContents();
            }

            if (BinaryReader.IsValidFile(_InternalFilePath))
            {
                IBinaryReader reader = BinaryReader.GetNew(_InternalFilePath);
                contents = reader.ReadContents();
            }

            if (OpenSDKReader.IsValidFile(_InternalFilePath))
            {
                IOpenSDKReader reader = OpenSDKReader.GetNew(_InternalFilePath);
                contents = reader.ReadContents();
            }

            if (OutlookReader.IsValidFile(_InternalFilePath))
            {
                IOutlookReader reader = OutlookReader.GetNew(_InternalFilePath);
                contents = reader.ReadContents();

                if (IncludeEmbeddedFiles)
                {
                    StringBuilder builder = new StringBuilder(contents);

                    foreach (string tempEmbeddedFile in reader.GetEmbeddedFiles())
                    {
                        //NOTE: the GetEmbeddedFiles method of OutlookDataReader creates and
                        //returns temp files for the attachments, if they exist, therefore
                        //the caller method is responsible for disposing of them here when
                        //done

                        IFileReader embeddedFileReader = new FileReader();
                        if (embeddedFileReader.IsReadable(tempEmbeddedFile))
                        {
                            embeddedFileReader.Open(tempEmbeddedFile);
                            builder.Append(embeddedFileReader.ReadContents());
                        }

                        try
                        {
                            File.Delete(tempEmbeddedFile);
                        }
                        catch (Exception) { }
                    }

                    contents = builder.ToString();
                }
            }

            //stopwatch.Stop();

            //if (stopwatch.Elapsed.TotalMinutes > 1)
            //{
            //    //took a long time to read this file, log it
            //    System.IO.File.AppendAllText("C:\\temp\\crawler\\logs\\LongRunning.log",
            //        _InternalFilePath + "," + stopwatch.Elapsed.TotalMinutes.ToString("0.00") + Environment.NewLine);
            //}

            return(contents);
        }