Пример #1
0
        /// <remarks>
        /// .NET System.IO.Compression and zip files
        /// http://blogs.msdn.com/dotnetinterop/archive/2006/04/05/.NET-System.IO.Compression-and-zip-files.aspx
        /// </remarks>
        public override bool GetResponse(System.Net.HttpWebResponse webresponse)
        {
            string filename = System.IO.Path.Combine(
                          Preferences.DownloadedTempFilePath
                        , (System.IO.Path.GetFileName(this.Uri.LocalPath)));
            this.Title = System.IO.Path.GetFileNameWithoutExtension(filename);

            SaveDownloadedFile(webresponse, filename);
            try
            {
                XpsDocument xpsDoc = new XpsDocument(filename, System.IO.FileAccess.Read);
                FixedDocumentSequence docSeq = xpsDoc.GetFixedDocumentSequence();
                for (int pageNum = 0; pageNum < docSeq.DocumentPaginator.PageCount; pageNum++)
                {
                    DocumentPage docPage = docSeq.DocumentPaginator.GetPage(pageNum);

                    foreach (System.Windows.UIElement uie in ((FixedPage)docPage.Visual).Children)
                    {
                        if (uie is System.Windows.Documents.Glyphs)
                        {
                            _WordsOnly += " " + ((System.Windows.Documents.Glyphs)uie).UnicodeString;
                        }
                    }
                }
                this.All = _WordsOnly;

                System.IO.File.Delete(filename);    // clean up
            }
            catch (Exception ex2)
            {
                //                ProgressEvent(this, new ProgressEventArgs(2, "IFilter failed on " + this.Uri + " " + e.Message + ""));
            }
            if (this.All != string.Empty)
            {
                this.Description = base.GetDescriptionFromWordsOnly(WordsOnly);
                return true;
            }
            else
            {
                return false;
            }
        }
        /// <summary>
        /// Construct a Document instance
        /// </summary>
        /// <remarks>
        /// In future, rather than being hardcoded switch statement, this method could
        /// use a 'provider' model where MIME-types and/or extensions are defined
        /// in the .config file, along with the assembly/class to use to process
        /// that type...
        /// </remarks>
        public static Document New(Uri uri, System.Net.HttpWebResponse contentType)
        {
            Document newDoc    = new IgnoreDocument(uri);
            string   mimeType  = ParseMimeType(contentType.ContentType.ToString()).ToLower();
            string   encoding  = ParseEncoding(contentType.ToString()).ToLower();
            string   extension = ParseExtension(uri.AbsoluteUri).ToLower();

            switch (mimeType)
            {
            case "text/css":
                break;

            case "application/x-msdownload":
                break;

            case "application/octet-stream":        // ZIP file or something unknown... give some a try
                switch (extension)
                {
                case ".docx":
                    newDoc = new DocxDocument(uri);
                    break;

                case ".xlsx":
                    newDoc = new XlsxDocument(uri);
                    break;

                case ".pptx":
                    newDoc = new PptxDocument(uri);
                    break;

                case ".pdf":
                    newDoc = new PdfDocument(uri);
                    break;

#if NET35
                case ".xps"
                    newDoc = new XpsDocument(uri);
                    break;
#endif
                }
                break;

            // docx
            case "application/vnd.ms-word.document.12":
            case "application/vnd.openxmlformats-officedocument.wordprocessingml":
            case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                newDoc = new DocxDocument(uri);
                break;

            // pptx
            case "application/vnd.openxmlformats-officedocument.presentationml":
            case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
                newDoc = new PptxDocument(uri);
                break;

            // xlsx
            case "application/vnd.openxmlformats-officedocument.spreadsheetml":
            case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
                newDoc = new XlsxDocument(uri);
                break;

            case "application/pdf":                                                 // pdf; changed from FilterDocument in v7
                newDoc = new PdfDocument(uri);
                break;

            case "application/vnd.ms-powerpoint":                                   // ppt
            case "application/msword":                                              // xls
                newDoc = new FilterDocument(uri);
                break;

            case "text/plain":
                newDoc = new TextDocument(uri);
                break;

            case "text/xml":
            case "application/xml":
                newDoc = new HtmlDocument(uri);     // TODO: XmlDocument parser
                break;

            case "application/rss+xml":
            case "application/rdf+xml":
            case "application/atom+xml":
                newDoc = new HtmlDocument(uri);     // TODO: RssDocument parser
                break;

            case "application/xhtml+xml":
                newDoc = new HtmlDocument(uri);     // TODO: XhtmlDocument parser
                break;

            case "text/html":
                newDoc = new HtmlDocument(uri);     // [v6] clarify code, suggested by "MADCookie2"
                break;

            case "image/jpeg":
                newDoc = new JpegDocument(uri);     // [v6] now parse image EXIF data
                break;

            default:
                // none of the above matched...
                if (mimeType.IndexOf("html") >= 0)
                {       // If we got 'text' data (not images)
                    newDoc = new HtmlDocument(uri);
                }
                else if (mimeType.IndexOf("text") >= 0)
                {       // If we got 'text' data (not images)
                    newDoc = new TextDocument(uri);
                }
                break;
            } // switch; if not set, defaults to IgnoreDocument
            newDoc.MimeType = mimeType;

            return(newDoc);
        }
Пример #3
0
        /// <summary>
        /// Construct a Document instance 
        /// </summary>
        /// <remarks>
        /// In future, rather than being hardcoded switch statement, this method could
        /// use a 'provider' model where MIME-types and/or extensions are defined
        /// in the .config file, along with the assembly/class to use to process
        /// that type...
        /// </remarks>
        public static Document New(Uri uri, System.Net.HttpWebResponse contentType)
        {
            Document newDoc = new IgnoreDocument(uri);
            string mimeType = ParseMimeType(contentType.ContentType.ToString()).ToLower();
            string encoding = ParseEncoding(contentType.ToString()).ToLower();
            string extension = ParseExtension(uri.AbsoluteUri).ToLower();
            switch (mimeType)
            {
                case "text/css":
                    break;
                case "application/x-msdownload":
                    break;
                case "application/octet-stream":    // ZIP file or something unknown... give some a try
                    switch (extension)
                    {
                        case ".docx":
                            newDoc = new DocxDocument(uri);
                            break;
                        case ".xlsx":
                            newDoc = new XlsxDocument(uri);
                            break;
                        case ".pptx":
                            newDoc = new PptxDocument(uri);
                            break;
                        case ".pdf":
                            newDoc = new PdfDocument(uri);
                            break;
            #if NET35
                        case ".xps"
                            newDoc = new XpsDocument(uri);
                            break;
            #endif
                    }
                    break;
                                                                                     // docx
                case "application/vnd.ms-word.document.12":
                case "application/vnd.openxmlformats-officedocument.wordprocessingml":
                case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                    newDoc = new DocxDocument(uri);
                    break;
                                                                                    // pptx
                case "application/vnd.openxmlformats-officedocument.presentationml":
                case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
                    newDoc = new PptxDocument(uri);
                    break;
                                                                                    // xlsx
                case "application/vnd.openxmlformats-officedocument.spreadsheetml":
                case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
                    newDoc = new XlsxDocument(uri);
                    break;
                case "application/pdf":                                             // pdf; changed from FilterDocument in v7
                    newDoc = new PdfDocument(uri);
                    break;
                case "application/vnd.ms-powerpoint":                               // ppt
                case "application/msword":                                          // xls
                    newDoc = new FilterDocument(uri);
                    break;
                case "text/plain":
                    newDoc = new TextDocument(uri);
                    break;
                case "text/xml":
                case "application/xml":
                    newDoc = new HtmlDocument(uri); // TODO: XmlDocument parser
                    break;
                case "application/rss+xml":
                case "application/rdf+xml":
                case "application/atom+xml":
                    newDoc = new HtmlDocument(uri); // TODO: RssDocument parser
                    break;
                case "application/xhtml+xml":
                    newDoc = new HtmlDocument(uri); // TODO: XhtmlDocument parser
                    break;
                case "text/html":
                    newDoc = new HtmlDocument(uri); // [v6] clarify code, suggested by "MADCookie2"
                    break;
                case "image/jpeg":
                    newDoc = new JpegDocument(uri); // [v6] now parse image EXIF data
                    break;
                default:
                    // none of the above matched...
                    if (mimeType.IndexOf("html") >= 0)
                    {   // If we got 'text' data (not images)
                        newDoc = new HtmlDocument(uri);
                    }
                    else if (mimeType.IndexOf("text") >= 0)
                    {   // If we got 'text' data (not images)
                        newDoc = new TextDocument(uri);
                    }
                    break;
            } // switch; if not set, defaults to IgnoreDocument
            newDoc.MimeType = mimeType;

            return newDoc;
        }