Beispiel #1
0
        /// <summary>
        /// Construct a Document instance
        /// </summary>
        /// <remarks>
        /// In future, rather than being hardcoded switch statement, this method could
        /// use a 'provider' model where MIME-types and/or extensions are defined
        /// in the .config file, along with the assembly/class to use to process
        /// that type...
        /// </remarks>
        public static Document New(Uri uri, System.Net.HttpWebResponse contentType)
        {
            Document newDoc    = new IgnoreDocument(uri);
            string   mimeType  = ParseMimeType(contentType.ContentType.ToString()).ToLower();
            string   encoding  = ParseEncoding(contentType.ToString()).ToLower();
            string   extension = ParseExtension(uri.AbsoluteUri);

            switch (mimeType)
            {
            case "text/css":
                break;

            case "application/x-msdownload":
                break;

            case "application/octet-stream":        // ZIP file or something unknown... give some a try
                switch (extension)
                {
                case ".docx":
                    newDoc = new DocxDocument(uri);
                    break;

                case ".xlsx":
                    newDoc = new XlsxDocument(uri);
                    break;

                case ".pptx":
                    newDoc = new PptxDocument(uri);
                    break;
                }
                break;

            // docx
            case "application/vnd.ms-word.document.12":
            case "application/vnd.openxmlformats-officedocument.wordprocessingml":
            case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                newDoc = new DocxDocument(uri);
                break;

            // pptx
            case "application/vnd.openxmlformats-officedocument.presentationml":
            case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
                newDoc = new PptxDocument(uri);
                break;

            // xlsx
            case "application/vnd.openxmlformats-officedocument.spreadsheetml":
            case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
                newDoc = new XlsxDocument(uri);
                break;

            case "application/vnd.ms-powerpoint":                                   // ppt
            case "application/pdf":                                                 // pdf
            case "application/msword":                                              // xls
                newDoc = new FilterDocument(uri);
                break;

            case "text/plain":
                newDoc = new TextDocument(uri);
                break;

            case "text/xml":
            case "application/xml":
                newDoc = new HtmlDocument(uri);     // TODO: XmlDocument parser
                break;

            case "application/rss+xml":
            case "application/rdf+xml":
            case "application/atom+xml":
                newDoc = new HtmlDocument(uri);     // TODO: RssDocument parser
                break;

            case "application/xhtml+xml":
                newDoc = new HtmlDocument(uri);     // TODO: XhtmlDocument parser
                break;

            case "text/html":
            default:
                // none of the above matched...
                if (mimeType.IndexOf("html") >= 0)
                {       // If we got 'text' data (not images)
                    newDoc = new HtmlDocument(uri);
                }
                else if (mimeType.IndexOf("text") >= 0)
                {       // If we got 'text' data (not images)
                    newDoc = new TextDocument(uri);
                }
                break;
            } // switch
            newDoc.MimeType = mimeType;

            return(newDoc);
        }
        /// <summary>
        /// Construct a Document instance 
        /// </summary>
        /// <remarks>
        /// In future, rather than being hardcoded switch statement, this method could
        /// use a 'provider' model where MIME-types and/or extensions are defined
        /// in the .config file, along with the assembly/class to use to process
        /// that type...
        /// </remarks>
        public static Document New(Uri uri, System.Net.HttpWebResponse contentType)
        {
            Document newDoc = new IgnoreDocument(uri);
            string mimeType = ParseMimeType(contentType.ContentType.ToString()).ToLower();
            string encoding = ParseEncoding(contentType.ToString()).ToLower();
            string extension = ParseExtension(uri.AbsoluteUri).ToLower();
            switch (mimeType)
            {
                case "text/css":
                    break;
                case "application/x-msdownload":
                    break;
                case "application/octet-stream":    // ZIP file or something unknown... give some a try
                    switch (extension)
                    {
                        case ".docx":
                            newDoc = new DocxDocument(uri);
                            break;
                        case ".xlsx":
                            newDoc = new XlsxDocument(uri);
                            break;
                        case ".pptx":
                            newDoc = new PptxDocument(uri);
                            break;
                        case ".pdf":
                            newDoc = new PdfDocument(uri);
                            break;
            #if NET35
                        case ".xps"
                            newDoc = new XpsDocument(uri);
                            break;
            #endif
                    }
                    break;
                                                                                     // docx
                case "application/vnd.ms-word.document.12":
                case "application/vnd.openxmlformats-officedocument.wordprocessingml":
                case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                    newDoc = new DocxDocument(uri);
                    break;
                                                                                    // pptx
                case "application/vnd.openxmlformats-officedocument.presentationml":
                case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
                    newDoc = new PptxDocument(uri);
                    break;
                                                                                    // xlsx
                case "application/vnd.openxmlformats-officedocument.spreadsheetml":
                case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
                    newDoc = new XlsxDocument(uri);
                    break;
                case "application/pdf":                                             // pdf; changed from FilterDocument in v7
                    newDoc = new PdfDocument(uri);
                    break;
                case "application/vnd.ms-powerpoint":                               // ppt
                case "application/msword":                                          // xls
                    newDoc = new FilterDocument(uri);
                    break;
                case "text/plain":
                    newDoc = new TextDocument(uri);
                    break;
                case "text/xml":
                case "application/xml":
                    newDoc = new HtmlDocument(uri); // TODO: XmlDocument parser
                    break;
                case "application/rss+xml":
                case "application/rdf+xml":
                case "application/atom+xml":
                    newDoc = new HtmlDocument(uri); // TODO: RssDocument parser
                    break;
                case "application/xhtml+xml":
                    newDoc = new HtmlDocument(uri); // TODO: XhtmlDocument parser
                    break;
                case "text/html":
                    newDoc = new HtmlDocument(uri); // [v6] clarify code, suggested by "MADCookie2"
                    break;
                case "image/jpeg":
                    newDoc = new JpegDocument(uri); // [v6] now parse image EXIF data
                    break;
                default:
                    // none of the above matched...
                    if (mimeType.IndexOf("html") >= 0)
                    {   // If we got 'text' data (not images)
                        newDoc = new HtmlDocument(uri);
                    }
                    else if (mimeType.IndexOf("text") >= 0)
                    {   // If we got 'text' data (not images)
                        newDoc = new TextDocument(uri);
                    }
                    break;
            } // switch; if not set, defaults to IgnoreDocument
            newDoc.MimeType = mimeType;

            return newDoc;
        }