/// <remarks> /// .NET System.IO.Compression and zip files /// http://blogs.msdn.com/dotnetinterop/archive/2006/04/05/.NET-System.IO.Compression-and-zip-files.aspx /// </remarks> public override bool GetResponse(System.Net.HttpWebResponse webresponse) { string filename = System.IO.Path.Combine( Preferences.DownloadedTempFilePath , (System.IO.Path.GetFileName(this.Uri.LocalPath))); this.Title = System.IO.Path.GetFileNameWithoutExtension(filename); SaveDownloadedFile(webresponse, filename); try { XpsDocument xpsDoc = new XpsDocument(filename, System.IO.FileAccess.Read); FixedDocumentSequence docSeq = xpsDoc.GetFixedDocumentSequence(); for (int pageNum = 0; pageNum < docSeq.DocumentPaginator.PageCount; pageNum++) { DocumentPage docPage = docSeq.DocumentPaginator.GetPage(pageNum); foreach (System.Windows.UIElement uie in ((FixedPage)docPage.Visual).Children) { if (uie is System.Windows.Documents.Glyphs) { _WordsOnly += " " + ((System.Windows.Documents.Glyphs)uie).UnicodeString; } } } this.All = _WordsOnly; System.IO.File.Delete(filename); // clean up } catch (Exception ex2) { // ProgressEvent(this, new ProgressEventArgs(2, "IFilter failed on " + this.Uri + " " + e.Message + "")); } if (this.All != string.Empty) { this.Description = base.GetDescriptionFromWordsOnly(WordsOnly); return true; } else { return false; } }
/// <summary> /// Construct a Document instance /// </summary> /// <remarks> /// In future, rather than being hardcoded switch statement, this method could /// use a 'provider' model where MIME-types and/or extensions are defined /// in the .config file, along with the assembly/class to use to process /// that type... /// </remarks> public static Document New(Uri uri, System.Net.HttpWebResponse contentType) { Document newDoc = new IgnoreDocument(uri); string mimeType = ParseMimeType(contentType.ContentType.ToString()).ToLower(); string encoding = ParseEncoding(contentType.ToString()).ToLower(); string extension = ParseExtension(uri.AbsoluteUri).ToLower(); switch (mimeType) { case "text/css": break; case "application/x-msdownload": break; case "application/octet-stream": // ZIP file or something unknown... give some a try switch (extension) { case ".docx": newDoc = new DocxDocument(uri); break; case ".xlsx": newDoc = new XlsxDocument(uri); break; case ".pptx": newDoc = new PptxDocument(uri); break; case ".pdf": newDoc = new PdfDocument(uri); break; #if NET35 case ".xps" newDoc = new XpsDocument(uri); break; #endif } break; // docx case "application/vnd.ms-word.document.12": case "application/vnd.openxmlformats-officedocument.wordprocessingml": case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": newDoc = new DocxDocument(uri); break; // pptx case "application/vnd.openxmlformats-officedocument.presentationml": case "application/vnd.openxmlformats-officedocument.presentationml.presentation": newDoc = new PptxDocument(uri); break; // xlsx case "application/vnd.openxmlformats-officedocument.spreadsheetml": case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": newDoc = new XlsxDocument(uri); break; case "application/pdf": // pdf; changed from FilterDocument in v7 newDoc = new PdfDocument(uri); break; case "application/vnd.ms-powerpoint": // ppt case "application/msword": // xls newDoc = new FilterDocument(uri); break; case "text/plain": newDoc = new TextDocument(uri); break; case "text/xml": case "application/xml": newDoc = new HtmlDocument(uri); // TODO: XmlDocument parser break; case "application/rss+xml": case "application/rdf+xml": case "application/atom+xml": newDoc = new HtmlDocument(uri); // TODO: RssDocument parser break; case "application/xhtml+xml": newDoc = new HtmlDocument(uri); // TODO: XhtmlDocument parser break; case "text/html": newDoc = new HtmlDocument(uri); // [v6] clarify code, suggested by "MADCookie2" break; case "image/jpeg": newDoc = new JpegDocument(uri); // [v6] now parse image EXIF data break; default: // none of the above matched... if (mimeType.IndexOf("html") >= 0) { // If we got 'text' data (not images) newDoc = new HtmlDocument(uri); } else if (mimeType.IndexOf("text") >= 0) { // If we got 'text' data (not images) newDoc = new TextDocument(uri); } break; } // switch; if not set, defaults to IgnoreDocument newDoc.MimeType = mimeType; return(newDoc); }
/// <summary> /// Construct a Document instance /// </summary> /// <remarks> /// In future, rather than being hardcoded switch statement, this method could /// use a 'provider' model where MIME-types and/or extensions are defined /// in the .config file, along with the assembly/class to use to process /// that type... /// </remarks> public static Document New(Uri uri, System.Net.HttpWebResponse contentType) { Document newDoc = new IgnoreDocument(uri); string mimeType = ParseMimeType(contentType.ContentType.ToString()).ToLower(); string encoding = ParseEncoding(contentType.ToString()).ToLower(); string extension = ParseExtension(uri.AbsoluteUri).ToLower(); switch (mimeType) { case "text/css": break; case "application/x-msdownload": break; case "application/octet-stream": // ZIP file or something unknown... give some a try switch (extension) { case ".docx": newDoc = new DocxDocument(uri); break; case ".xlsx": newDoc = new XlsxDocument(uri); break; case ".pptx": newDoc = new PptxDocument(uri); break; case ".pdf": newDoc = new PdfDocument(uri); break; #if NET35 case ".xps" newDoc = new XpsDocument(uri); break; #endif } break; // docx case "application/vnd.ms-word.document.12": case "application/vnd.openxmlformats-officedocument.wordprocessingml": case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": newDoc = new DocxDocument(uri); break; // pptx case "application/vnd.openxmlformats-officedocument.presentationml": case "application/vnd.openxmlformats-officedocument.presentationml.presentation": newDoc = new PptxDocument(uri); break; // xlsx case "application/vnd.openxmlformats-officedocument.spreadsheetml": case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": newDoc = new XlsxDocument(uri); break; case "application/pdf": // pdf; changed from FilterDocument in v7 newDoc = new PdfDocument(uri); break; case "application/vnd.ms-powerpoint": // ppt case "application/msword": // xls newDoc = new FilterDocument(uri); break; case "text/plain": newDoc = new TextDocument(uri); break; case "text/xml": case "application/xml": newDoc = new HtmlDocument(uri); // TODO: XmlDocument parser break; case "application/rss+xml": case "application/rdf+xml": case "application/atom+xml": newDoc = new HtmlDocument(uri); // TODO: RssDocument parser break; case "application/xhtml+xml": newDoc = new HtmlDocument(uri); // TODO: XhtmlDocument parser break; case "text/html": newDoc = new HtmlDocument(uri); // [v6] clarify code, suggested by "MADCookie2" break; case "image/jpeg": newDoc = new JpegDocument(uri); // [v6] now parse image EXIF data break; default: // none of the above matched... if (mimeType.IndexOf("html") >= 0) { // If we got 'text' data (not images) newDoc = new HtmlDocument(uri); } else if (mimeType.IndexOf("text") >= 0) { // If we got 'text' data (not images) newDoc = new TextDocument(uri); } break; } // switch; if not set, defaults to IgnoreDocument newDoc.MimeType = mimeType; return newDoc; }