// ZipInputStream.cs
//
// ------------------------------------------------------------------
//
// Copyright (c) 2009-2010 Dino Chiesa.
// All rights reserved.
//
// This code module is part of DotNetZip, a zipfile class library.
//
// ------------------------------------------------------------------
//
// This code is licensed under the Microsoft Public License.
// See the file License.txt for the license details.
// More info on: http://dotnetzip.codeplex.com
//
// ------------------------------------------------------------------
//
// last saved (in emacs):
// Time-stamp: <2011-July-31 14:48:30>
//
// ------------------------------------------------------------------
//
// This module defines the ZipInputStream class, which is a stream metaphor for
// reading zip files. This class does not depend on Ionic.Zip.ZipFile, but rather
// stands alongside it as an alternative "container" for ZipEntry, when reading zips.
//
// It adds one interesting method to the normal "stream" interface: GetNextEntry.
//
// ------------------------------------------------------------------
//
using System;
using System.Threading;
using System.Collections.Generic;
using System.IO;
using Ionic.Zip;
using OfficeOpenXml.Packaging.Ionic.Zip;
using OfficeOpenXml.Packaging.Ionic.Crc;
namespace Ionic.Zip
{
///
/// Provides a stream metaphor for reading zip files.
///
///
///
///
/// This class provides an alternative programming model for reading zip files to
/// the one enabled by the class. Use this when reading zip
/// files, as an alternative to the class, when you would
/// like to use a Stream class to read the file.
///
///
///
/// Some application designs require a readable stream for input. This stream can
/// be used to read a zip file, and extract entries.
///
///
///
/// Both the ZipInputStream class and the ZipFile class can be used
/// to read and extract zip files. Both of them support many of the common zip
/// features, including Unicode, different compression levels, and ZIP64. The
/// programming models differ. For example, when extracting entries via calls to
/// the GetNextEntry() and Read() methods on the
/// ZipInputStream class, the caller is responsible for creating the file,
/// writing the bytes into the file, setting the attributes on the file, and
/// setting the created, last modified, and last accessed timestamps on the
/// file. All of these things are done automatically by a call to ZipEntry.Extract(). For this reason, the
/// ZipInputStream is generally recommended for when your application wants
/// to extract the data, without storing that data into a file.
///
///
///
/// Aside from the obvious differences in programming model, there are some
/// differences in capability between the ZipFile class and the
/// ZipInputStream class.
///
///
///
/// -
/// ZipFile can be used to create or update zip files, or read and
/// extract zip files. ZipInputStream can be used only to read and
/// extract zip files. If you want to use a stream to create zip files, check
/// out the .
///
///
/// -
/// ZipInputStream cannot read segmented or spanned
/// zip files.
///
///
/// -
/// ZipInputStream will not read Zip file comments.
///
///
/// -
/// When reading larger files, ZipInputStream will always underperform
/// ZipFile. This is because the ZipInputStream does a full scan on the
/// zip file, while the ZipFile class reads the central directory of the
/// zip file.
///
///
///
///
///
internal class ZipInputStream : Stream
{
///
/// Create a ZipInputStream, wrapping it around an existing stream.
///
///
///
///
///
/// While the class is generally easier
/// to use, this class provides an alternative to those
/// applications that want to read from a zipfile directly,
/// using a .
///
///
///
/// Both the ZipInputStream class and the ZipFile class can be used
/// to read and extract zip files. Both of them support many of the common zip
/// features, including Unicode, different compression levels, and ZIP64. The
/// programming models differ. For example, when extracting entries via calls to
/// the GetNextEntry() and Read() methods on the
/// ZipInputStream class, the caller is responsible for creating the file,
/// writing the bytes into the file, setting the attributes on the file, and
/// setting the created, last modified, and last accessed timestamps on the
/// file. All of these things are done automatically by a call to ZipEntry.Extract(). For this reason, the
/// ZipInputStream is generally recommended for when your application wants
/// to extract the data, without storing that data into a file.
///
///
///
/// Aside from the obvious differences in programming model, there are some
/// differences in capability between the ZipFile class and the
/// ZipInputStream class.
///
///
///
/// -
/// ZipFile can be used to create or update zip files, or read and extract
/// zip files. ZipInputStream can be used only to read and extract zip
/// files. If you want to use a stream to create zip files, check out the .
///
///
/// -
/// ZipInputStream cannot read segmented or spanned
/// zip files.
///
///
/// -
/// ZipInputStream will not read Zip file comments.
///
///
/// -
/// When reading larger files, ZipInputStream will always underperform
/// ZipFile. This is because the ZipInputStream does a full scan on the
/// zip file, while the ZipFile class reads the central directory of the
/// zip file.
///
///
///
///
///
///
///
/// The stream to read. It must be readable. This stream will be closed at
/// the time the ZipInputStream is closed.
///
///
///
///
/// This example shows how to read a zip file, and extract entries, using the
/// ZipInputStream class.
///
///
/// private void Unzip()
/// {
/// byte[] buffer= new byte[2048];
/// int n;
/// using (var raw = File.Open(inputFileName, FileMode.Open, FileAccess.Read))
/// {
/// using (var input= new ZipInputStream(raw))
/// {
/// ZipEntry e;
/// while (( e = input.GetNextEntry()) != null)
/// {
/// if (e.IsDirectory) continue;
/// string outputPath = Path.Combine(extractDir, e.FileName);
/// using (var output = File.Open(outputPath, FileMode.Create, FileAccess.ReadWrite))
/// {
/// while ((n= input.Read(buffer, 0, buffer.Length)) > 0)
/// {
/// output.Write(buffer,0,n);
/// }
/// }
/// }
/// }
/// }
/// }
///
///
///
/// Private Sub UnZip()
/// Dim inputFileName As String = "MyArchive.zip"
/// Dim extractDir As String = "extract"
/// Dim buffer As Byte() = New Byte(2048) {}
/// Using raw As FileStream = File.Open(inputFileName, FileMode.Open, FileAccess.Read)
/// Using input As ZipInputStream = New ZipInputStream(raw)
/// Dim e As ZipEntry
/// Do While (Not e = input.GetNextEntry Is Nothing)
/// If Not e.IsDirectory Then
/// Using output As FileStream = File.Open(Path.Combine(extractDir, e.FileName), _
/// FileMode.Create, FileAccess.ReadWrite)
/// Dim n As Integer
/// Do While (n = input.Read(buffer, 0, buffer.Length) > 0)
/// output.Write(buffer, 0, n)
/// Loop
/// End Using
/// End If
/// Loop
/// End Using
/// End Using
/// End Sub
///
///
public ZipInputStream(Stream stream) : this (stream, false) { }
///
/// Create a ZipInputStream, given the name of an existing zip file.
///
///
///
///
///
/// This constructor opens a FileStream for the given zipfile, and
/// wraps a ZipInputStream around that. See the documentation for the
/// constructor for full details.
///
///
///
/// While the class is generally easier
/// to use, this class provides an alternative to those
/// applications that want to read from a zipfile directly,
/// using a .
///
///
///
///
///
/// The name of the filesystem file to read.
///
///
///
///
/// This example shows how to read a zip file, and extract entries, using the
/// ZipInputStream class.
///
///
/// private void Unzip()
/// {
/// byte[] buffer= new byte[2048];
/// int n;
/// using (var input= new ZipInputStream(inputFileName))
/// {
/// ZipEntry e;
/// while (( e = input.GetNextEntry()) != null)
/// {
/// if (e.IsDirectory) continue;
/// string outputPath = Path.Combine(extractDir, e.FileName);
/// using (var output = File.Open(outputPath, FileMode.Create, FileAccess.ReadWrite))
/// {
/// while ((n= input.Read(buffer, 0, buffer.Length)) > 0)
/// {
/// output.Write(buffer,0,n);
/// }
/// }
/// }
/// }
/// }
///
///
///
/// Private Sub UnZip()
/// Dim inputFileName As String = "MyArchive.zip"
/// Dim extractDir As String = "extract"
/// Dim buffer As Byte() = New Byte(2048) {}
/// Using input As ZipInputStream = New ZipInputStream(inputFileName)
/// Dim e As ZipEntry
/// Do While (Not e = input.GetNextEntry Is Nothing)
/// If Not e.IsDirectory Then
/// Using output As FileStream = File.Open(Path.Combine(extractDir, e.FileName), _
/// FileMode.Create, FileAccess.ReadWrite)
/// Dim n As Integer
/// Do While (n = input.Read(buffer, 0, buffer.Length) > 0)
/// output.Write(buffer, 0, n)
/// Loop
/// End Using
/// End If
/// Loop
/// End Using
/// End Sub
///
///
public ZipInputStream(String fileName)
{
Stream stream = File.Open(fileName, FileMode.Open, FileAccess.Read, FileShare.Read );
_Init(stream, false, fileName);
}
///
/// Create a ZipInputStream, explicitly specifying whether to
/// keep the underlying stream open.
///
///
///
/// See the documentation for the ZipInputStream(Stream)
/// constructor for a discussion of the class, and an example of how to use the class.
///
///
///
/// The stream to read from. It must be readable.
///
///
///
/// true if the application would like the stream
/// to remain open after the ZipInputStream has been closed.
///
public ZipInputStream(Stream stream, bool leaveOpen)
{
_Init(stream, leaveOpen, null);
}
private void _Init(Stream stream, bool leaveOpen, string name)
{
_inputStream = stream;
if (!_inputStream.CanRead)
throw new ZipException("The stream must be readable.");
_container= new ZipContainer(this);
_provisionalAlternateEncoding = System.Text.Encoding.GetEncoding("IBM437");
_leaveUnderlyingStreamOpen = leaveOpen;
_findRequired= true;
_name = name ?? "(stream)";
}
/// Provides a string representation of the instance.
///
///
/// This can be useful for debugging purposes.
///
///
/// a string representation of the instance.
public override String ToString()
{
return String.Format ("ZipInputStream::{0}(leaveOpen({1})))", _name, _leaveUnderlyingStreamOpen);
}
///
/// The text encoding to use when reading entries into the zip archive, for
/// those entries whose filenames or comments cannot be encoded with the
/// default (IBM437) encoding.
///
///
///
///
/// In its
/// zip specification, PKWare describes two options for encoding
/// filenames and comments: using IBM437 or UTF-8. But, some archiving tools
/// or libraries do not follow the specification, and instead encode
/// characters using the system default code page. For example, WinRAR when
/// run on a machine in Shanghai may encode filenames with the Big-5 Chinese
/// (950) code page. This behavior is contrary to the Zip specification, but
/// it occurs anyway.
///
///
///
/// When using DotNetZip to read zip archives that use something other than
/// UTF-8 or IBM437, set this property to specify the code page to use when
/// reading encoded filenames and comments for each ZipEntry in the zip
/// file.
///
///
///
/// This property is "provisional". When the entry in the zip archive is not
/// explicitly marked as using UTF-8, then IBM437 is used to decode filenames
/// and comments. If a loss of data would result from using IBM436 -
/// specifically when encoding and decoding is not reflexive - the codepage
/// specified here is used. It is possible, therefore, to have a given entry
/// with a Comment encoded in IBM437 and a FileName encoded with
/// the specified "provisional" codepage.
///
///
///
/// When a zip file uses an arbitrary, non-UTF8 code page for encoding, there
/// is no standard way for the reader application - whether DotNetZip, WinZip,
/// WinRar, or something else - to know which codepage has been used for the
/// entries. Readers of zip files are not able to inspect the zip file and
/// determine the codepage that was used for the entries contained within it.
/// It is left to the application or user to determine the necessary codepage
/// when reading zip files encoded this way. If you use an incorrect codepage
/// when reading a zipfile, you will get entries with filenames that are
/// incorrect, and the incorrect filenames may even contain characters that
/// are not legal for use within filenames in Windows. Extracting entries with
/// illegal characters in the filenames will lead to exceptions. It's too bad,
/// but this is just the way things are with code pages in zip files. Caveat
/// Emptor.
///
///
///
public System.Text.Encoding ProvisionalAlternateEncoding
{
get
{
return _provisionalAlternateEncoding;
}
set
{
_provisionalAlternateEncoding = value;
}
}
///
/// Size of the work buffer to use for the ZLIB codec during decompression.
///
///
///
/// Setting this affects the performance and memory efficiency of compression
/// and decompression. For larger files, setting this to a larger size may
/// improve performance, but the exact numbers vary depending on available
/// memory, and a bunch of other variables. I don't have good firm
/// recommendations on how to set it. You'll have to test it yourself. Or
/// just leave it alone and accept the default.
///
public int CodecBufferSize
{
get;
set;
}
///
/// Sets the password to be used on the ZipInputStream instance.
///
///
///
///
///
/// When reading a zip archive, this password is used to read and decrypt the
/// entries that are encrypted within the zip file. When entries within a zip
/// file use different passwords, set the appropriate password for the entry
/// before the first call to Read() for each entry.
///
///
///
/// When reading an entry that is not encrypted, the value of this property is
/// ignored.
///
///
///
///
///
///
/// This example uses the ZipInputStream to read and extract entries from a
/// zip file, using a potentially different password for each entry.
///
///
/// byte[] buffer= new byte[2048];
/// int n;
/// using (var raw = File.Open(_inputFileName, FileMode.Open, FileAccess.Read ))
/// {
/// using (var input= new ZipInputStream(raw))
/// {
/// ZipEntry e;
/// while (( e = input.GetNextEntry()) != null)
/// {
/// input.Password = PasswordForEntry(e.FileName);
/// if (e.IsDirectory) continue;
/// string outputPath = Path.Combine(_extractDir, e.FileName);
/// using (var output = File.Open(outputPath, FileMode.Create, FileAccess.ReadWrite))
/// {
/// while ((n= input.Read(buffer,0,buffer.Length)) > 0)
/// {
/// output.Write(buffer,0,n);
/// }
/// }
/// }
/// }
/// }
///
///
///
public String Password
{
set
{
if (_closed)
{
_exceptionPending = true;
throw new System.InvalidOperationException("The stream has been closed.");
}
_Password = value;
}
}
private void SetupStream()
{
// Seek to the correct posn in the file, and open a
// stream that can be read.
_crcStream= _currentEntry.InternalOpenReader(_Password);
_LeftToRead = _crcStream.Length;
_needSetup = false;
}
internal Stream ReadStream
{
get
{
return _inputStream;
}
}
///
/// Read the data from the stream into the buffer.
///
///
///
///
/// The data for the zipentry will be decrypted and uncompressed, as
/// necessary, before being copied into the buffer.
///
///
///
/// You must set the property before calling
/// Read() the first time for an encrypted entry. To determine if an
/// entry is encrypted and requires a password, check the ZipEntry.Encryption property.
///
///
///
/// The buffer to hold the data read from the stream.
/// the offset within the buffer to copy the first byte read.
/// the number of bytes to read.
/// the number of bytes read, after decryption and decompression.
public override int Read(byte[] buffer, int offset, int count)
{
if (_closed)
{
_exceptionPending = true;
throw new System.InvalidOperationException("The stream has been closed.");
}
if (_needSetup)
SetupStream();
if (_LeftToRead == 0) return 0;
int len = (_LeftToRead > count) ? count : (int)_LeftToRead;
int n = _crcStream.Read(buffer, offset, len);
_LeftToRead -= n;
if (_LeftToRead == 0)
{
int CrcResult = _crcStream.Crc;
_currentEntry.VerifyCrcAfterExtract(CrcResult);
_inputStream.Seek(_endOfEntry, SeekOrigin.Begin);
// workitem 10178
SharedUtilities.Workaround_Ladybug318918(_inputStream);
}
return n;
}
///
/// Read the next entry from the zip file.
///
///
///
///
/// Call this method just before calling ,
/// to position the pointer in the zip file to the next entry that can be
/// read. Subsequent calls to Read(), will decrypt and decompress the
/// data in the zip file, until Read() returns 0.
///
///
///
/// Each time you call GetNextEntry(), the pointer in the wrapped
/// stream is moved to the next entry in the zip file. If you call , and thus re-position the pointer within
/// the file, you will need to call GetNextEntry() again, to insure
/// that the file pointer is positioned at the beginning of a zip entry.
///
///
///
/// This method returns the ZipEntry. Using a stream approach, you will
/// read the raw bytes for an entry in a zip file via calls to Read().
/// Alternatively, you can extract an entry into a file, or a stream, by
/// calling , or one of its siblings.
///
///
///
///
///
/// The ZipEntry read. Returns null (or Nothing in VB) if there are no more
/// entries in the zip file.
///
///
public ZipEntry GetNextEntry()
{
if (_findRequired)
{
// find the next signature
long d = SharedUtilities.FindSignature(_inputStream, ZipConstants.ZipEntrySignature);
if (d == -1) return null;
// back up 4 bytes: ReadEntry assumes the file pointer is positioned before the entry signature
_inputStream.Seek(-4, SeekOrigin.Current);
// workitem 10178
SharedUtilities.Workaround_Ladybug318918(_inputStream);
}
// workitem 10923
else if (_firstEntry)
{
// we've already read one entry.
// Seek to the end of it.
_inputStream.Seek(_endOfEntry, SeekOrigin.Begin);
SharedUtilities.Workaround_Ladybug318918(_inputStream);
}
_currentEntry = ZipEntry.ReadEntry(_container, !_firstEntry);
// ReadEntry leaves the file position after all the entry
// data and the optional bit-3 data descriptpr. This is
// where the next entry would normally start.
_endOfEntry = _inputStream.Position;
_firstEntry = true;
_needSetup = true;
_findRequired= false;
return _currentEntry;
}
///
/// Dispose the stream.
///
///
///
///
/// This method disposes the ZipInputStream. It may also close the
/// underlying stream, depending on which constructor was used.
///
///
///
/// Typically the application will call Dispose() implicitly, via
/// a using statement in C#, or a Using statement in VB.
///
///
///
/// Application code won't call this code directly. This method may
/// be invoked in two distinct scenarios. If disposing == true, the
/// method has been called directly or indirectly by a user's code,
/// for example via the public Dispose() method. In this case, both
/// managed and unmanaged resources can be referenced and disposed.
/// If disposing == false, the method has been called by the runtime
/// from inside the object finalizer and this method should not
/// reference other objects; in that case only unmanaged resources
/// must be referenced or disposed.
///
///
///
///
/// true if the Dispose method was invoked by user code.
///
protected override void Dispose(bool disposing)
{
if (_closed) return;
if (disposing) // not called from finalizer
{
// When ZipInputStream is used within a using clause, and an
// exception is thrown, Close() is invoked. But we don't want to
// try to write anything in that case. Eventually the exception
// will be propagated to the application.
if (_exceptionPending) return;
if (!_leaveUnderlyingStreamOpen)
{
#if NETCF
_inputStream.Close();
#else
_inputStream.Dispose();
#endif
}
}
_closed= true;
}
///
/// Always returns true.
///
public override bool CanRead { get { return true; }}
///
/// Returns the value of CanSeek for the underlying (wrapped) stream.
///
public override bool CanSeek { get { return _inputStream.CanSeek; } }
///
/// Always returns false.
///
public override bool CanWrite { get { return false; } }
///
/// Returns the length of the underlying stream.
///
public override long Length { get { return _inputStream.Length; }}
///
/// Gets or sets the position of the underlying stream.
///
///
/// Setting the position is equivalent to calling Seek(value, SeekOrigin.Begin).
///
public override long Position
{
get { return _inputStream.Position;}
set { Seek(value, SeekOrigin.Begin); }
}
///
/// This is a no-op.
///
public override void Flush()
{
throw new NotSupportedException("Flush");
}
///
/// This method always throws a NotSupportedException.
///
/// ignored
/// ignored
/// ignored
public override void Write(byte[] buffer, int offset, int count)
{
throw new NotSupportedException("Write");
}
///
/// This method seeks in the underlying stream.
///
///
///
///
/// Call this method if you want to seek around within the zip file for random access.
///
///
///
/// Applications can intermix calls to Seek() with calls to . After a call to Seek(),
/// GetNextEntry() will get the next ZipEntry that falls after
/// the current position in the input stream. You're on your own for finding
/// out just where to seek in the stream, to get to the various entries.
///
///
///
///
/// the offset point to seek to
/// the reference point from which to seek
/// The new position
public override long Seek(long offset, SeekOrigin origin)
{
_findRequired= true;
var x = _inputStream.Seek(offset, origin);
// workitem 10178
SharedUtilities.Workaround_Ladybug318918(_inputStream);
return x;
}
///
/// This method always throws a NotSupportedException.
///
/// ignored
public override void SetLength(long value)
{
throw new NotSupportedException();
}
private Stream _inputStream;
private System.Text.Encoding _provisionalAlternateEncoding;
private ZipEntry _currentEntry;
private bool _firstEntry;
private bool _needSetup;
private ZipContainer _container;
private CrcCalculatorStream _crcStream;
private Int64 _LeftToRead;
internal String _Password;
private Int64 _endOfEntry;
private string _name;
private bool _leaveUnderlyingStreamOpen;
private bool _closed;
private bool _findRequired;
private bool _exceptionPending;
}
}