/// <summary> /// Construct a Document instance /// </summary> /// <remarks> /// In future, rather than being hardcoded switch statement, this method could /// use a 'provider' model where MIME-types and/or extensions are defined /// in the .config file, along with the assembly/class to use to process /// that type... /// </remarks> public static Document New(Uri uri, System.Net.HttpWebResponse contentType) { Document newDoc = new IgnoreDocument(uri); string mimeType = ParseMimeType(contentType.ContentType.ToString()).ToLower(); string encoding = ParseEncoding(contentType.ToString()).ToLower(); string extension = ParseExtension(uri.AbsoluteUri); switch (mimeType) { case "text/css": break; case "application/x-msdownload": break; case "application/octet-stream": // ZIP file or something unknown... give some a try switch (extension) { case ".docx": newDoc = new DocxDocument(uri); break; case ".xlsx": newDoc = new XlsxDocument(uri); break; case ".pptx": newDoc = new PptxDocument(uri); break; } break; // docx case "application/vnd.ms-word.document.12": case "application/vnd.openxmlformats-officedocument.wordprocessingml": case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": newDoc = new DocxDocument(uri); break; // pptx case "application/vnd.openxmlformats-officedocument.presentationml": case "application/vnd.openxmlformats-officedocument.presentationml.presentation": newDoc = new PptxDocument(uri); break; // xlsx case "application/vnd.openxmlformats-officedocument.spreadsheetml": case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": newDoc = new XlsxDocument(uri); break; case "application/vnd.ms-powerpoint": // ppt case "application/pdf": // pdf case "application/msword": // xls newDoc = new FilterDocument(uri); break; case "text/plain": newDoc = new TextDocument(uri); break; case "text/xml": case "application/xml": newDoc = new HtmlDocument(uri); // TODO: XmlDocument parser break; case "application/rss+xml": case "application/rdf+xml": case "application/atom+xml": newDoc = new HtmlDocument(uri); // TODO: RssDocument parser break; case "application/xhtml+xml": newDoc = new HtmlDocument(uri); // TODO: XhtmlDocument parser break; case "text/html": default: // none of the above matched... if (mimeType.IndexOf("html") >= 0) { // If we got 'text' data (not images) newDoc = new HtmlDocument(uri); } else if (mimeType.IndexOf("text") >= 0) { // If we got 'text' data (not images) newDoc = new TextDocument(uri); } break; } // switch newDoc.MimeType = mimeType; return(newDoc); }
/// <summary> /// Construct a Document instance /// </summary> /// <remarks> /// In future, rather than being hardcoded switch statement, this method could /// use a 'provider' model where MIME-types and/or extensions are defined /// in the .config file, along with the assembly/class to use to process /// that type... /// </remarks> public static Document New(Uri uri, System.Net.HttpWebResponse contentType) { Document newDoc = new IgnoreDocument(uri); string mimeType = ParseMimeType(contentType.ContentType.ToString()).ToLower(); string encoding = ParseEncoding(contentType.ToString()).ToLower(); string extension = ParseExtension(uri.AbsoluteUri).ToLower(); switch (mimeType) { case "text/css": break; case "application/x-msdownload": break; case "application/octet-stream": // ZIP file or something unknown... give some a try switch (extension) { case ".docx": newDoc = new DocxDocument(uri); break; case ".xlsx": newDoc = new XlsxDocument(uri); break; case ".pptx": newDoc = new PptxDocument(uri); break; case ".pdf": newDoc = new PdfDocument(uri); break; #if NET35 case ".xps" newDoc = new XpsDocument(uri); break; #endif } break; // docx case "application/vnd.ms-word.document.12": case "application/vnd.openxmlformats-officedocument.wordprocessingml": case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": newDoc = new DocxDocument(uri); break; // pptx case "application/vnd.openxmlformats-officedocument.presentationml": case "application/vnd.openxmlformats-officedocument.presentationml.presentation": newDoc = new PptxDocument(uri); break; // xlsx case "application/vnd.openxmlformats-officedocument.spreadsheetml": case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": newDoc = new XlsxDocument(uri); break; case "application/pdf": // pdf; changed from FilterDocument in v7 newDoc = new PdfDocument(uri); break; case "application/vnd.ms-powerpoint": // ppt case "application/msword": // xls newDoc = new FilterDocument(uri); break; case "text/plain": newDoc = new TextDocument(uri); break; case "text/xml": case "application/xml": newDoc = new HtmlDocument(uri); // TODO: XmlDocument parser break; case "application/rss+xml": case "application/rdf+xml": case "application/atom+xml": newDoc = new HtmlDocument(uri); // TODO: RssDocument parser break; case "application/xhtml+xml": newDoc = new HtmlDocument(uri); // TODO: XhtmlDocument parser break; case "text/html": newDoc = new HtmlDocument(uri); // [v6] clarify code, suggested by "MADCookie2" break; case "image/jpeg": newDoc = new JpegDocument(uri); // [v6] now parse image EXIF data break; default: // none of the above matched... if (mimeType.IndexOf("html") >= 0) { // If we got 'text' data (not images) newDoc = new HtmlDocument(uri); } else if (mimeType.IndexOf("text") >= 0) { // If we got 'text' data (not images) newDoc = new TextDocument(uri); } break; } // switch; if not set, defaults to IgnoreDocument newDoc.MimeType = mimeType; return newDoc; }