public Fb2SummaryParser(Stream source) { try { XDocument xmlDocument = source.GetXmlDocument(); _root = xmlDocument.Root; } catch { source.Position = 0; var zip = ZipContainer.Unzip(source); source = zip.Files.First().Stream; source.Position = 0; XDocument xmlDocument = source.GetXmlDocument(); _root = xmlDocument.Root; } if (_root == null) { throw new DataException("Can't load book."); } XAttribute attribute = _root.Attribute("xmlns"); if (attribute == null) { throw new DataException("Can't load book."); } _ns = attribute.Value; }
private static System.Text.Encoding GetEncoding(ZipContainer container, string t) { switch (container.AlternateEncodingUsage) { case ZipOption.Always: return(container.AlternateEncoding); case ZipOption.Never: return(container.DefaultEncoding); } // AsNecessary is in force var e = container.DefaultEncoding; if (t == null) { return(e); } var bytes = e.GetBytes(t); var t2 = e.GetString(bytes, 0, bytes.Length); if (t2.Equals(t)) { return(e); } return(container.AlternateEncoding); }
public EpubCoverHelper(ZipContainer zip, XNamespace opfns, XElement opfRoot, string oebps) { _zip = zip; _opfns = opfns; _opfRoot = opfRoot; _oebps = oebps; }
/// <summary> /// Enumerates all files in an archived folder /// </summary> /// <param name="folderName">name of the zip archive</param> public static void EnumerateAllArchivedFiles(string folderName) { //ExStart:EnumerateAllArchivedFiles //get ZIP folder's path string folderPath = Common.GetFilePath(folderName); //initialize ZIP container using (var container = new ZipContainer(folderPath)) { //loop through all the entities in the folder for (int i = 0; i < container.Entities.Count; i++) { //display each entity's information Console.WriteLine("Name: " + container.Entities[i].Name); Console.WriteLine("Path: " + container.Entities[i].Path.ToString()); Console.WriteLine("Media type: " + container.Entities[i].MediaType); /** Release 17.10 changes */ Console.WriteLine("Date:" + container.Entities[i].Date); Console.WriteLine("Size:" + container.Entities[i].Size); Console.WriteLine("CRC:" + container.Entities[i][MetadataNames.Crc]); } } //ExEnd:EnumerateAllArchivedFiles }
/// <summary> /// Reads one <c>ZipEntry</c> from the given stream. The content for /// the entry does not get decompressed or decrypted. This method /// basically reads metadata, and seeks. /// </summary> /// <param name="zc">the ZipContainer this entry belongs to.</param> /// <param name="first"> /// true of this is the first entry being read from the stream. /// </param> /// <returns>the <c>ZipEntry</c> read from the stream.</returns> internal static ZipEntry ReadEntry(ZipContainer zc, bool first) { ZipFile zf = zc.ZipFile; Stream s = zc.ReadStream; System.Text.Encoding defaultEncoding = zc.AlternateEncoding; ZipEntry entry = new ZipEntry(); entry._Source = ZipEntrySource.ZipFile; entry._container = zc; entry._archiveStream = s; if (zf != null) { zf.OnReadEntry(true, null); } if (first) { HandlePK00Prefix(s); } // Read entry header, including any encryption header if (!ReadHeader(entry, defaultEncoding)) { return(null); } // Store the position in the stream for this entry // change for workitem 8098 entry.__FileDataPosition = entry.ArchiveStream.Position; // seek past the data without reading it. We will read on Extract() s.Seek(entry._CompressedFileDataSize + entry._LengthOfTrailer, SeekOrigin.Current); // workitem 10178 Crisis.Ionic.Zip.SharedUtilities.Workaround_Ladybug318918(s); // ReadHeader moves the file pointer to the end of the entry header, // as well as any encryption header. // CompressedFileDataSize includes: // the maybe compressed, maybe encrypted file data // the encryption trailer, if any // the bit 3 descriptor, if any // workitem 5306 // http://www.codeplex.com/DotNetZip/WorkItem/View.aspx?WorkItemId=5306 HandleUnexpectedDataDescriptor(entry); if (zf != null) { zf.OnReadBytes(entry); zf.OnReadEntry(false, entry); } return(entry); }
private void _Init(Stream stream, bool leaveOpen) { _inputStream = stream; if (!_inputStream.CanRead) { throw new ZipException("The stream must be readable."); } _container = new ZipContainer(this); _provisionalAlternateEncoding = System.Text.Encoding.GetEncoding("IBM437"); _leaveUnderlyingStreamOpen = leaveOpen; _findRequired = true; }
public EpubTokenParser(XDocument opf, EpubPath opfPath, ZipContainer zip, CSS css, Dictionary <string, int> anchors) { _opf = opf; _opfPath = opfPath; _zip = zip; _css = css; _anchors = anchors; _opfns = XNamespace.Get("http://www.idpf.org/2007/opf"); _opfRoot = _opf.Root; if (_opfRoot == null) { throw new DataException("Invalid epub meta info."); } }
private void _Init(Stream stream, bool leaveOpen, string name) { _inputStream = stream; if (!_inputStream.CanRead) { throw new ZipException("The stream must be readable."); } _container = new ZipContainer(this); #if SILVERLIGHT _provisionalAlternateEncoding = System.Text.Encoding.UTF8; #else _provisionalAlternateEncoding = System.Text.Encoding.GetEncoding("IBM437"); #endif _leaveUnderlyingStreamOpen = leaveOpen; _findRequired = true; _name = name ?? "(stream)"; }
/// <summary> /// Reads one <c>ZipEntry</c> from the given stream. If the entry is encrypted, we don't /// decrypt at this point. We also do not decompress. Mostly we read metadata. /// </summary> /// <param name="zc">the ZipContainer this entry belongs to.</param> /// <param name="first">true of this is the first entry being read from the stream.</param> /// <returns>the <c>ZipEntry</c> read from the stream.</returns> internal static ZipEntry ReadEntry(ZipContainer zc, bool first) { ZipFile zf = zc.ZipFile; Stream s = zc.ReadStream; System.Text.Encoding defaultEncoding = zc.ProvisionalAlternateEncoding; ZipEntry entry = new ZipEntry(); entry._Source = ZipEntrySource.ZipFile; entry._container = zc; entry._archiveStream = s; if (zf != null) zf.OnReadEntry(true, null); if (first) HandlePK00Prefix(s); // Read entry header, including any encryption header if (!ReadHeader(entry, defaultEncoding)) return null; // Store the position in the stream for this entry // change for workitem 8098 entry.__FileDataPosition = entry.ArchiveStream.Position; // seek past the data without reading it. We will read on Extract() s.Seek(entry._CompressedFileDataSize + entry._LengthOfTrailer, SeekOrigin.Current); // ReadHeader moves the file pointer to the end of the entry header, // as well as any encryption header. // CompressedFileDataSize includes: // the maybe compressed, maybe encrypted file data // the encryption trailer, if any // the bit 3 descriptor, if any // workitem 5306 // http://www.codeplex.com/DotNetZip/WorkItem/View.aspx?WorkItemId=5306 HandleUnexpectedDataDescriptor(entry); if (zf != null) { zf.OnReadBytes(entry); zf.OnReadEntry(false, entry); } return entry; }
/// <summary> /// Reads concrete files from a ZIP folder /// </summary> /// <param name="folderName">Name of the zipped folder</param> public static void ReadConcreteFile(string folderName) { //ExStart:ReadConcreteFile //get ZIP folder's path string folderPath = Common.getFilePath(folderName); ExtractorFactory extractorFactory = new ExtractorFactory(); //initialize ZIP container using (var container = new ZipContainer(folderPath)) { //loop through all the entities in the folder for (int i = 0; i < container.Entities.Count; i++) { //extract content of each entity by creating a textextractor using extractfactory's CreateTextExtractor function using (TextExtractor extractor = extractorFactory.CreateTextExtractor(container.Entities[i].OpenStream())) { //display the extracted text Console.WriteLine(extractor.ExtractAll()); } } } //ExEnd:ReadConcreteFile }
/// <summary> /// Extracts text from the entity of ZIP container: /// </summary> /// <param name="folderName">Name of the zipped folder</param> public static void RetrieveEntity(string folderName) { //ExStart:RetrieveEntity_17.12 //get ZIP folder's path string folderPath = Common.GetFilePath(folderName); ExtractorFactory extractorFactory = new ExtractorFactory(); //initialize ZIP container using (var container = new ZipContainer(folderPath)) { Container.Entity containerEntry = container.GetEntity("META-INF\\container.xml"); // If the entity isn't found if (containerEntry == null) { throw new GroupDocsTextException("File not found"); } // Try to create a text extractor TextExtractor extractor = extractorFactory.CreateTextExtractor(containerEntry.OpenStream()); try { // Extract a text (if the document type is supported) Console.WriteLine(extractor == null ? "Document type isn't supported" : extractor.ExtractAll()); } finally { // Cleanup if (extractor != null) { extractor.Dispose(); } } } //ExEnd:RetrieveEntity_17.12 }
public EpubSummaryParser(Stream source) { _zip = ZipContainer.Unzip(source); XDocument xmlDocument = _zip.GetFileStream("META-INF/container.xml").GetXmlDocument(); XElement root = xmlDocument.Root; if (root == null) { throw new DataException(InvalidEpubMetaInfo); } XAttribute attribute = root.Attribute("xmlns"); XNamespace xmlns = (attribute != null) ? XNamespace.Get(attribute.Value) : XNamespace.None; XAttribute fullPath = xmlDocument.Descendants(xmlns + "rootfile").First().Attribute("full-path"); if (fullPath == null) { throw new DataException(InvalidEpubMetaInfo); } string path = fullPath.Value; _opfPath = path; _opf = _zip.GetFileStream(path).GetXmlDocument(); _opfRoot = _opf.Root; if (_opfRoot == null) { throw new DataException(InvalidEpubMetaInfo); } _oebps = GetPath(path); _opfns = XNamespace.Get("http://www.idpf.org/2007/opf"); _opfdc = XNamespace.Get("http://purl.org/dc/elements/1.1/"); _coverHelper = new EpubCoverHelper(_zip, _opfns, _opfRoot, _oebps); }
private static byte[] GenCentralDirectoryFooter(long StartOfCentralDirectory, long EndOfCentralDirectory, Zip64Option zip64, int entryCount, string comment, ZipContainer container) { System.Text.Encoding encoding = GetEncoding(container, comment); int j = 0; int bufferLength = 22; byte[] block = null; Int16 commentLength = 0; if ((comment != null) && (comment.Length != 0)) { block = encoding.GetBytes(comment); commentLength = (Int16)block.Length; } bufferLength += commentLength; byte[] bytes = new byte[bufferLength]; int i = 0; // signature byte[] sig = BitConverter.GetBytes(ZipConstants.EndOfCentralDirectorySignature); Array.Copy(sig, 0, bytes, i, 4); i += 4; // number of this disk // (this number may change later) bytes[i++] = 0; bytes[i++] = 0; // number of the disk with the start of the central directory // (this number may change later) bytes[i++] = 0; bytes[i++] = 0; // handle ZIP64 extensions for the end-of-central-directory if (entryCount >= 0xFFFF || zip64 == Zip64Option.Always) { // the ZIP64 version. for (j = 0; j < 4; j++) { bytes[i++] = 0xFF; } } else { // the standard version. // total number of entries in the central dir on this disk bytes[i++] = (byte)(entryCount & 0x00FF); bytes[i++] = (byte)((entryCount & 0xFF00) >> 8); // total number of entries in the central directory bytes[i++] = (byte)(entryCount & 0x00FF); bytes[i++] = (byte)((entryCount & 0xFF00) >> 8); } // size of the central directory Int64 SizeOfCentralDirectory = EndOfCentralDirectory - StartOfCentralDirectory; if (SizeOfCentralDirectory >= 0xFFFFFFFF || StartOfCentralDirectory >= 0xFFFFFFFF) { // The actual data is in the ZIP64 central directory structure for (j = 0; j < 8; j++) { bytes[i++] = 0xFF; } } else { // size of the central directory (we just get the low 4 bytes) bytes[i++] = (byte)(SizeOfCentralDirectory & 0x000000FF); bytes[i++] = (byte)((SizeOfCentralDirectory & 0x0000FF00) >> 8); bytes[i++] = (byte)((SizeOfCentralDirectory & 0x00FF0000) >> 16); bytes[i++] = (byte)((SizeOfCentralDirectory & 0xFF000000) >> 24); // offset of the start of the central directory (we just get the low 4 bytes) bytes[i++] = (byte)(StartOfCentralDirectory & 0x000000FF); bytes[i++] = (byte)((StartOfCentralDirectory & 0x0000FF00) >> 8); bytes[i++] = (byte)((StartOfCentralDirectory & 0x00FF0000) >> 16); bytes[i++] = (byte)((StartOfCentralDirectory & 0xFF000000) >> 24); } // zip archive comment if ((comment == null) || (comment.Length == 0)) { // no comment! bytes[i++] = (byte)0; bytes[i++] = (byte)0; } else { // the size of our buffer defines the max length of the comment we can write if (commentLength + i + 2 > bytes.Length) { commentLength = (Int16)(bytes.Length - i - 2); } bytes[i++] = (byte)(commentLength & 0x00FF); bytes[i++] = (byte)((commentLength & 0xFF00) >> 8); if (commentLength != 0) { // now actually write the comment itself into the byte buffer for (j = 0; (j < commentLength) && (i + j < bytes.Length); j++) { bytes[i + j] = block[j]; } i += j; } } // s.Write(bytes, 0, i); return(bytes); }
public static bool WriteCentralDirectoryStructure(Stream s, ICollection <ZipEntry> entries, uint numSegments, Zip64Option zip64, String comment, ZipContainer container) { var zss = s as ZipSegmentedStream; if (zss != null) { zss.ContiguousWrite = true; } // write to a memory stream in order to keep the // CDR contiguous Int64 aLength = 0; using (var ms = new MemoryStream()) { foreach (ZipEntry e in entries) { if (e.IncludedInMostRecentSave) { // this writes a ZipDirEntry corresponding to the ZipEntry e.WriteCentralDirectoryEntry(ms); } } var a = ms.ToArray(); s.Write(a, 0, a.Length); aLength = a.Length; } // We need to keep track of the start and // Finish of the Central Directory Structure. // Cannot always use WriteStream.Length or Position; some streams do // not support these. (eg, ASP.NET Response.OutputStream) In those // cases we have a CountingStream. // Also, we cannot just set Start as s.Position bfore the write, and Finish // as s.Position after the write. In a split zip, the write may actually // flip to the next segment. In that case, Start will be zero. But we // don't know that til after we know the size of the thing to write. So the // answer is to compute the directory, then ask the ZipSegmentedStream which // segment that directory would fall in, it it were written. Then, include // that data into the directory, and finally, write the directory to the // output stream. var output = s as CountingStream; long Finish = (output != null) ? output.ComputedPosition : s.Position; // BytesWritten long Start = Finish - aLength; // need to know which segment the EOCD record starts in UInt32 startSegment = (zss != null) ? zss.CurrentSegment : 0; Int64 SizeOfCentralDirectory = Finish - Start; int countOfEntries = CountEntries(entries); bool needZip64CentralDirectory = zip64 == Zip64Option.Always || countOfEntries >= 0xFFFF || SizeOfCentralDirectory > 0xFFFFFFFF || Start > 0xFFFFFFFF; byte[] a2 = null; // emit ZIP64 extensions as required if (needZip64CentralDirectory) { if (zip64 == Zip64Option.Never) { #if NETCF || CORECLR throw new ZipException("The archive requires a ZIP64 Central Directory. Consider enabling ZIP64 extensions."); #else System.Diagnostics.StackFrame sf = new System.Diagnostics.StackFrame(1); if (sf.GetMethod().DeclaringType == typeof(ZipFile)) { throw new ZipException("The archive requires a ZIP64 Central Directory. Consider setting the ZipFile.UseZip64WhenSaving property."); } else { throw new ZipException("The archive requires a ZIP64 Central Directory. Consider setting the ZipOutputStream.EnableZip64 property."); } #endif } var a = GenZip64EndOfCentralDirectory(Start, Finish, countOfEntries, numSegments); a2 = GenCentralDirectoryFooter(Start, Finish, zip64, countOfEntries, comment, container); if (startSegment != 0) { UInt32 thisSegment = zss.ComputeSegment(a.Length + a2.Length); int i = 16; // number of this disk Array.Copy(BitConverter.GetBytes(thisSegment), 0, a, i, 4); i += 4; // number of the disk with the start of the central directory //Array.Copy(BitConverter.GetBytes(startSegment), 0, a, i, 4); Array.Copy(BitConverter.GetBytes(thisSegment), 0, a, i, 4); i = 60; // offset 60 // number of the disk with the start of the zip64 eocd Array.Copy(BitConverter.GetBytes(thisSegment), 0, a, i, 4); i += 4; i += 8; // offset 72 // total number of disks Array.Copy(BitConverter.GetBytes(thisSegment), 0, a, i, 4); } s.Write(a, 0, a.Length); } else { a2 = GenCentralDirectoryFooter(Start, Finish, zip64, countOfEntries, comment, container); } // now, the regular footer if (startSegment != 0) { // The assumption is the central directory is never split across // segment boundaries. UInt16 thisSegment = (UInt16)zss.ComputeSegment(a2.Length); int i = 4; // number of this disk Array.Copy(BitConverter.GetBytes(thisSegment), 0, a2, i, 2); i += 2; // number of the disk with the start of the central directory //Array.Copy(BitConverter.GetBytes((UInt16)startSegment), 0, a2, i, 2); Array.Copy(BitConverter.GetBytes(thisSegment), 0, a2, i, 2); i += 2; } s.Write(a2, 0, a2.Length); // reset the contiguous write property if necessary if (zss != null) { zss.ContiguousWrite = false; } return(needZip64CentralDirectory); }
// build the TOC by reading each entry in the file. private static void ReadIntoInstance_Orig(ZipFile zf) { zf.OnReadStarted(); //zf._entries = new System.Collections.Generic.List<ZipEntry>(); zf._entries = new System.Collections.Generic.Dictionary<String,ZipEntry>(); ZipEntry e; if (zf.Verbose) if (zf.Name == null) zf.StatusMessageTextWriter.WriteLine("Reading zip from stream..."); else zf.StatusMessageTextWriter.WriteLine("Reading zip {0}...", zf.Name); // work item 6647: PK00 (packed to removable disk) bool firstEntry = true; ZipContainer zc = new ZipContainer(zf); while ((e = ZipEntry.ReadEntry(zc, firstEntry)) != null) { if (zf.Verbose) zf.StatusMessageTextWriter.WriteLine(" {0}", e.FileName); zf._entries.Add(e.FileName,e); firstEntry = false; } // read the zipfile's central directory structure here. // workitem 9912 // But, because it may be corrupted, ignore errors. try { ZipEntry de; while ((de = ZipEntry.ReadDirEntry(zf)) != null) { // Housekeeping: Since ZipFile exposes ZipEntry elements in the enumerator, // we need to copy the comment that we grab from the ZipDirEntry // into the ZipEntry, so the application can access the comment. // Also since ZipEntry is used to Write zip files, we need to copy the // file attributes to the ZipEntry as appropriate. ZipEntry e1 = zf._entries[de.FileName]; if (e1 != null){ e1._Comment = de.Comment; if (de.IsDirectory) e1.MarkAsDirectory(); } } // workitem 8299 if (zf._locEndOfCDS > 0) zf.ReadStream.Seek(zf._locEndOfCDS, SeekOrigin.Begin); ReadCentralDirectoryFooter(zf); if (zf.Verbose && !String.IsNullOrEmpty(zf.Comment)) zf.StatusMessageTextWriter.WriteLine("Zip file Comment: {0}", zf.Comment); } catch { } zf.OnReadCompleted(); }
private void _Init(Stream stream, bool leaveOpen) { _inputStream = stream; if (!_inputStream.CanRead) throw new ZipException("The stream must be readable."); _container= new ZipContainer(this); _provisionalAlternateEncoding = System.Text.Encoding.GetEncoding("IBM437"); _leaveUnderlyingStreamOpen = leaveOpen; _findRequired= true; }
// build the TOC by reading each entry in the file. private static void ReadIntoInstance_Orig(ZipFile zf) { zf.OnReadStarted(); //zf._entries = new System.Collections.Generic.List<ZipEntry>(); zf._entries = new System.Collections.Generic.Dictionary <String, ZipEntry>(); ZipEntry e; if (zf.Verbose) { if (zf.Name == null) { zf.StatusMessageTextWriter.WriteLine("Reading zip from stream..."); } else { zf.StatusMessageTextWriter.WriteLine("Reading zip {0}...", zf.Name); } } // work item 6647: PK00 (packed to removable disk) bool firstEntry = true; ZipContainer zc = new ZipContainer(zf); while ((e = ZipEntry.ReadEntry(zc, firstEntry)) != null) { if (zf.Verbose) { zf.StatusMessageTextWriter.WriteLine(" {0}", e.FileName); } zf._entries.Add(e.FileName, e); firstEntry = false; } // read the zipfile's central directory structure here. // workitem 9912 // But, because it may be corrupted, ignore errors. try { ZipEntry de; // in lieu of hashset, use a dictionary var previouslySeen = new Dictionary <String, Object>(); while ((de = ZipEntry.ReadDirEntry(zf, previouslySeen)) != null) { // Housekeeping: Since ZipFile exposes ZipEntry elements in the enumerator, // we need to copy the comment that we grab from the ZipDirEntry // into the ZipEntry, so the application can access the comment. // Also since ZipEntry is used to Write zip files, we need to copy the // file attributes to the ZipEntry as appropriate. ZipEntry e1 = zf._entries[de.FileName]; if (e1 != null) { e1._Comment = de.Comment; if (de.IsDirectory) { e1.MarkAsDirectory(); } } previouslySeen.Add(de.FileName, null); // to prevent dupes } // workitem 8299 if (zf._locEndOfCDS > 0) { zf.ReadStream.Seek(zf._locEndOfCDS, SeekOrigin.Begin); } ReadCentralDirectoryFooter(zf); if (zf.Verbose && !String.IsNullOrEmpty(zf.Comment)) { zf.StatusMessageTextWriter.WriteLine("Zip file Comment: {0}", zf.Comment); } } catch (ZipException) { } catch (IOException) { } zf.OnReadCompleted(); }
public ActionResult ExtractText([FromBody] string fileName, string password = null) { //ExStart:ExtractText ExtractorFactory factory = new ExtractorFactory(); string path = Server.MapPath("../App_Data//Uploads//" + fileName); string ext = Path.GetExtension(path); List <string> extractedText = new List <string>(); try { string line = null; //If file password procted if (!string.IsNullOrWhiteSpace(password)) { if (ext == ".one") { var loadOptions = new LoadOptions(); loadOptions.Password = password; using (var extractor = new NoteTextExtractor(path, loadOptions)) { do { int lineNumber = 0; do { line = extractor.ExtractLine(); lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); }while (line != null); } } else { LoadOptions loadOptions = new LoadOptions(); loadOptions.Password = password; WordsTextExtractor protectedDocument = new WordsTextExtractor(path, loadOptions); do { int lineNumber = 0; do { line = protectedDocument.ExtractLine(); lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); }while (line != null); } } else { //if file type is zip if (ext == ".zip") { using (var container = new ZipContainer(path)) { for (int i = 0; i < container.Entities.Count; i++) { using (TextExtractor extractor = factory.CreateTextExtractor(container.Entities[i].OpenStream())) { int lineNumber = 0; do { line = extractor.ExtractLine(); lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); } } } } else { TextExtractor extractor = factory.CreateTextExtractor(path); do { int lineNumber = 0; do { try { line = extractor.ExtractLine(); } catch (Exception) { if (ext == ".one") { extractedText.Add("Invalid password"); break; } } lineNumber++; if (line != null) { extractedText.Add(line); } }while (line != null); }while (line != null); } } //extractedText.Add(extractor.ExtractAll()); } catch (Exception ex) { extractedText.Add(ex.Message); } return(Json(extractedText, JsonRequestBehavior.AllowGet)); }