001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 * 017 */ 018package org.apache.commons.compress.archivers.zip; 019 020import java.io.BufferedInputStream; 021import java.io.Closeable; 022import java.io.EOFException; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.RandomAccessFile; 027import java.util.Arrays; 028import java.util.Collections; 029import java.util.Comparator; 030import java.util.Enumeration; 031import java.util.HashMap; 032import java.util.LinkedList; 033import java.util.List; 034import java.util.Map; 035import java.util.zip.Inflater; 036import java.util.zip.InflaterInputStream; 037import java.util.zip.ZipException; 038 039import org.apache.commons.compress.utils.IOUtils; 040 041import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD; 042import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT; 043import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD; 044import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC; 045import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT; 046 047/** 048 * Replacement for <code>java.util.ZipFile</code>. 049 * 050 * <p>This class adds support for file name encodings other than UTF-8 051 * (which is required to work on ZIP files created by native zip tools 052 * and is able to skip a preamble like the one found in self 053 * extracting archives. Furthermore it returns instances of 054 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 055 * instead of <code>java.util.zip.ZipEntry</code>.</p> 056 * 057 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would 058 * have to reimplement all methods anyway. Like 059 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the 060 * covers and supports compressed and uncompressed entries. As of 061 * Apache Commons Compress 1.3 it also transparently supports Zip64 062 * extensions and thus individual entries and archives larger than 4 063 * GB or with more than 65536 entries.</p> 064 * 065 * <p>The method signatures mimic the ones of 066 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions: 067 * 068 * <ul> 069 * <li>There is no getName method.</li> 070 * <li>entries has been renamed to getEntries.</li> 071 * <li>getEntries and getEntry return 072 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code> 073 * instances.</li> 074 * <li>close is allowed to throw IOException.</li> 075 * </ul> 076 * 077 */ 078public class ZipFile implements Closeable { 079 private static final int HASH_SIZE = 509; 080 static final int NIBLET_MASK = 0x0f; 081 static final int BYTE_SHIFT = 8; 082 private static final int POS_0 = 0; 083 private static final int POS_1 = 1; 084 private static final int POS_2 = 2; 085 private static final int POS_3 = 3; 086 087 /** 088 * List of entries in the order they appear inside the central 089 * directory. 090 */ 091 private final List<ZipArchiveEntry> entries = 092 new LinkedList<ZipArchiveEntry>(); 093 094 /** 095 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 096 */ 097 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = 098 new HashMap<String, LinkedList<ZipArchiveEntry>>(HASH_SIZE); 099 100 private static final class OffsetEntry { 101 private long headerOffset = -1; 102 private long dataOffset = -1; 103 } 104 105 /** 106 * The encoding to use for filenames and the file comment. 107 * 108 * <p>For a list of possible values see <a 109 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>. 110 * Defaults to UTF-8.</p> 111 */ 112 private final String encoding; 113 114 /** 115 * The zip encoding to use for filenames and the file comment. 116 */ 117 private final ZipEncoding zipEncoding; 118 119 /** 120 * File name of actual source. 121 */ 122 private final String archiveName; 123 124 /** 125 * The actual data source. 126 */ 127 private final RandomAccessFile archive; 128 129 /** 130 * Whether to look for and use Unicode extra fields. 131 */ 132 private final boolean useUnicodeExtraFields; 133 134 /** 135 * Whether the file is closed. 136 */ 137 private volatile boolean closed = true; 138 139 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 140 private final byte[] DWORD_BUF = new byte[DWORD]; 141 private final byte[] WORD_BUF = new byte[WORD]; 142 private final byte[] CFH_BUF = new byte[CFH_LEN]; 143 private final byte[] SHORT_BUF = new byte[SHORT]; 144 145 /** 146 * Opens the given file for reading, assuming "UTF8" for file names. 147 * 148 * @param f the archive. 149 * 150 * @throws IOException if an error occurs while reading the file. 151 */ 152 public ZipFile(File f) throws IOException { 153 this(f, ZipEncodingHelper.UTF8); 154 } 155 156 /** 157 * Opens the given file for reading, assuming "UTF8". 158 * 159 * @param name name of the archive. 160 * 161 * @throws IOException if an error occurs while reading the file. 162 */ 163 public ZipFile(String name) throws IOException { 164 this(new File(name), ZipEncodingHelper.UTF8); 165 } 166 167 /** 168 * Opens the given file for reading, assuming the specified 169 * encoding for file names, scanning unicode extra fields. 170 * 171 * @param name name of the archive. 172 * @param encoding the encoding to use for file names, use null 173 * for the platform's default encoding 174 * 175 * @throws IOException if an error occurs while reading the file. 176 */ 177 public ZipFile(String name, String encoding) throws IOException { 178 this(new File(name), encoding, true); 179 } 180 181 /** 182 * Opens the given file for reading, assuming the specified 183 * encoding for file names and scanning for unicode extra fields. 184 * 185 * @param f the archive. 186 * @param encoding the encoding to use for file names, use null 187 * for the platform's default encoding 188 * 189 * @throws IOException if an error occurs while reading the file. 190 */ 191 public ZipFile(File f, String encoding) throws IOException { 192 this(f, encoding, true); 193 } 194 195 /** 196 * Opens the given file for reading, assuming the specified 197 * encoding for file names. 198 * 199 * @param f the archive. 200 * @param encoding the encoding to use for file names, use null 201 * for the platform's default encoding 202 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 203 * Extra Fields (if present) to set the file names. 204 * 205 * @throws IOException if an error occurs while reading the file. 206 */ 207 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) 208 throws IOException { 209 this.archiveName = f.getAbsolutePath(); 210 this.encoding = encoding; 211 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 212 this.useUnicodeExtraFields = useUnicodeExtraFields; 213 archive = new RandomAccessFile(f, "r"); 214 boolean success = false; 215 try { 216 Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = 217 populateFromCentralDirectory(); 218 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 219 success = true; 220 } finally { 221 closed = !success; 222 if (!success) { 223 IOUtils.closeQuietly(archive); 224 } 225 } 226 } 227 228 /** 229 * The encoding to use for filenames and the file comment. 230 * 231 * @return null if using the platform's default character encoding. 232 */ 233 public String getEncoding() { 234 return encoding; 235 } 236 237 /** 238 * Closes the archive. 239 * @throws IOException if an error occurs closing the archive. 240 */ 241 public void close() throws IOException { 242 // this flag is only written here and read in finalize() which 243 // can never be run in parallel. 244 // no synchronization needed. 245 closed = true; 246 247 archive.close(); 248 } 249 250 /** 251 * close a zipfile quietly; throw no io fault, do nothing 252 * on a null parameter 253 * @param zipfile file to close, can be null 254 */ 255 public static void closeQuietly(ZipFile zipfile) { 256 IOUtils.closeQuietly(zipfile); 257 } 258 259 /** 260 * Returns all entries. 261 * 262 * <p>Entries will be returned in the same order they appear 263 * within the archive's central directory.</p> 264 * 265 * @return all entries as {@link ZipArchiveEntry} instances 266 */ 267 public Enumeration<ZipArchiveEntry> getEntries() { 268 return Collections.enumeration(entries); 269 } 270 271 /** 272 * Returns all entries in physical order. 273 * 274 * <p>Entries will be returned in the same order their contents 275 * appear within the archive.</p> 276 * 277 * @return all entries as {@link ZipArchiveEntry} instances 278 * 279 * @since 1.1 280 */ 281 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 282 ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]); 283 Arrays.sort(allEntries, OFFSET_COMPARATOR); 284 return Collections.enumeration(Arrays.asList(allEntries)); 285 } 286 287 /** 288 * Returns a named entry - or {@code null} if no entry by 289 * that name exists. 290 * 291 * <p>If multiple entries with the same name exist the first entry 292 * in the archive's central directory by that name is 293 * returned.</p> 294 * 295 * @param name name of the entry. 296 * @return the ZipArchiveEntry corresponding to the given name - or 297 * {@code null} if not present. 298 */ 299 public ZipArchiveEntry getEntry(String name) { 300 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 301 return entriesOfThatName != null ? entriesOfThatName.getFirst() : null; 302 } 303 304 /** 305 * Returns all named entries in the same order they appear within 306 * the archive's central directory. 307 * 308 * @param name name of the entry. 309 * @return the Iterable<ZipArchiveEntry> corresponding to the 310 * given name 311 * @since 1.6 312 */ 313 public Iterable<ZipArchiveEntry> getEntries(String name) { 314 List<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 315 return entriesOfThatName != null ? entriesOfThatName 316 : Collections.<ZipArchiveEntry>emptyList(); 317 } 318 319 /** 320 * Returns all named entries in the same order their contents 321 * appear within the archive. 322 * 323 * @param name name of the entry. 324 * @return the Iterable<ZipArchiveEntry> corresponding to the 325 * given name 326 * @since 1.6 327 */ 328 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(String name) { 329 ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0]; 330 if (nameMap.containsKey(name)) { 331 entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName); 332 Arrays.sort(entriesOfThatName, OFFSET_COMPARATOR); 333 } 334 return Arrays.asList(entriesOfThatName); 335 } 336 337 /** 338 * Whether this class is able to read the given entry. 339 * 340 * <p>May return false if it is set up to use encryption or a 341 * compression method that hasn't been implemented yet.</p> 342 * @since 1.1 343 */ 344 public boolean canReadEntryData(ZipArchiveEntry ze) { 345 return ZipUtil.canHandleEntryData(ze); 346 } 347 348 /** 349 * Expose the raw stream of the archive entry (compressed form) 350 * <p/> 351 * This method does not relate to how/if we understand the payload in the 352 * stream, since we really only intend to move it on to somewhere else. 353 * 354 * @param ze The entry to get the stream for 355 * @return The raw input stream containing (possibly) compressed data. 356 */ 357 private InputStream getRawInputStream(ZipArchiveEntry ze) { 358 if (!(ze instanceof Entry)) { 359 return null; 360 } 361 OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); 362 long start = offsetEntry.dataOffset; 363 return new BoundedInputStream(start, ze.getCompressedSize()); 364 } 365 366 367 /** 368 * Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream. 369 * Compression and all other attributes will be as in this file. 370 * This method transfers entries based on the central directory of the zip file. 371 * 372 * @param target The zipArchiveOutputStream to write the entries to 373 * @param predicate A predicate that selects which entries to write 374 */ 375 public void copyRawEntries(ZipArchiveOutputStream target, ZipArchiveEntryPredicate predicate) 376 throws IOException { 377 Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 378 while (src.hasMoreElements()) { 379 ZipArchiveEntry entry = src.nextElement(); 380 if (predicate.test( entry)) { 381 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 382 } 383 } 384 } 385 386 /** 387 * Returns an InputStream for reading the contents of the given entry. 388 * 389 * @param ze the entry to get the stream for. 390 * @return a stream to read the entry from. 391 * @throws IOException if unable to create an input stream from the zipentry 392 * @throws ZipException if the zipentry uses an unsupported feature 393 */ 394 public InputStream getInputStream(ZipArchiveEntry ze) 395 throws IOException, ZipException { 396 if (!(ze instanceof Entry)) { 397 return null; 398 } 399 // cast valididty is checked just above 400 OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); 401 ZipUtil.checkRequestedFeatures(ze); 402 long start = offsetEntry.dataOffset; 403 BoundedInputStream bis = 404 new BoundedInputStream(start, ze.getCompressedSize()); 405 switch (ZipMethod.getMethodByCode(ze.getMethod())) { 406 case STORED: 407 return bis; 408 case UNSHRINKING: 409 return new UnshrinkingInputStream(bis); 410 case IMPLODING: 411 return new ExplodingInputStream(ze.getGeneralPurposeBit().getSlidingDictionarySize(), 412 ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), new BufferedInputStream(bis)); 413 case DEFLATED: 414 bis.addDummy(); 415 final Inflater inflater = new Inflater(true); 416 return new InflaterInputStream(bis, inflater) { 417 @Override 418 public void close() throws IOException { 419 super.close(); 420 inflater.end(); 421 } 422 }; 423 default: 424 throw new ZipException("Found unsupported compression method " 425 + ze.getMethod()); 426 } 427 } 428 429 /** 430 * <p> 431 * Convenience method to return the entry's content as a String if isUnixSymlink() 432 * returns true for it, otherwise returns null. 433 * </p> 434 * 435 * <p>This method assumes the symbolic link's file name uses the 436 * same encoding that as been specified for this ZipFile.</p> 437 * 438 * @param entry ZipArchiveEntry object that represents the symbolic link 439 * @return entry's content as a String 440 * @throws IOException problem with content's input stream 441 * @since 1.5 442 */ 443 public String getUnixSymlink(ZipArchiveEntry entry) throws IOException { 444 if (entry != null && entry.isUnixSymlink()) { 445 InputStream in = null; 446 try { 447 in = getInputStream(entry); 448 byte[] symlinkBytes = IOUtils.toByteArray(in); 449 return zipEncoding.decode(symlinkBytes); 450 } finally { 451 if (in != null) { 452 in.close(); 453 } 454 } 455 } else { 456 return null; 457 } 458 } 459 460 /** 461 * Ensures that the close method of this zipfile is called when 462 * there are no more references to it. 463 * @see #close() 464 */ 465 @Override 466 protected void finalize() throws Throwable { 467 try { 468 if (!closed) { 469 System.err.println("Cleaning up unclosed ZipFile for archive " 470 + archiveName); 471 close(); 472 } 473 } finally { 474 super.finalize(); 475 } 476 } 477 478 /** 479 * Length of a "central directory" entry structure without file 480 * name, extra fields or comment. 481 */ 482 private static final int CFH_LEN = 483 /* version made by */ SHORT 484 /* version needed to extract */ + SHORT 485 /* general purpose bit flag */ + SHORT 486 /* compression method */ + SHORT 487 /* last mod file time */ + SHORT 488 /* last mod file date */ + SHORT 489 /* crc-32 */ + WORD 490 /* compressed size */ + WORD 491 /* uncompressed size */ + WORD 492 /* filename length */ + SHORT 493 /* extra field length */ + SHORT 494 /* file comment length */ + SHORT 495 /* disk number start */ + SHORT 496 /* internal file attributes */ + SHORT 497 /* external file attributes */ + WORD 498 /* relative offset of local header */ + WORD; 499 500 private static final long CFH_SIG = 501 ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 502 503 /** 504 * Reads the central directory of the given archive and populates 505 * the internal tables with ZipArchiveEntry instances. 506 * 507 * <p>The ZipArchiveEntrys will know all data that can be obtained from 508 * the central directory alone, but not the data that requires the 509 * local file header or additional data to be read.</p> 510 * 511 * @return a map of zipentries that didn't have the language 512 * encoding flag set when read. 513 */ 514 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() 515 throws IOException { 516 HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = 517 new HashMap<ZipArchiveEntry, NameAndComment>(); 518 519 positionAtCentralDirectory(); 520 521 archive.readFully(WORD_BUF); 522 long sig = ZipLong.getValue(WORD_BUF); 523 524 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 525 throw new IOException("central directory is empty, can't expand" 526 + " corrupt archive."); 527 } 528 529 while (sig == CFH_SIG) { 530 readCentralDirectoryEntry(noUTF8Flag); 531 archive.readFully(WORD_BUF); 532 sig = ZipLong.getValue(WORD_BUF); 533 } 534 return noUTF8Flag; 535 } 536 537 /** 538 * Reads an individual entry of the central directory, creats an 539 * ZipArchiveEntry from it and adds it to the global maps. 540 * 541 * @param noUTF8Flag map used to collect entries that don't have 542 * their UTF-8 flag set and whose name will be set by data read 543 * from the local file header later. The current entry may be 544 * added to this map. 545 */ 546 private void 547 readCentralDirectoryEntry(Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) 548 throws IOException { 549 archive.readFully(CFH_BUF); 550 int off = 0; 551 OffsetEntry offset = new OffsetEntry(); 552 Entry ze = new Entry(offset); 553 554 int versionMadeBy = ZipShort.getValue(CFH_BUF, off); 555 off += SHORT; 556 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK); 557 558 off += SHORT; // skip version info 559 560 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(CFH_BUF, off); 561 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 562 final ZipEncoding entryEncoding = 563 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 564 ze.setGeneralPurposeBit(gpFlag); 565 566 off += SHORT; 567 568 ze.setMethod(ZipShort.getValue(CFH_BUF, off)); 569 off += SHORT; 570 571 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(CFH_BUF, off)); 572 ze.setTime(time); 573 off += WORD; 574 575 ze.setCrc(ZipLong.getValue(CFH_BUF, off)); 576 off += WORD; 577 578 ze.setCompressedSize(ZipLong.getValue(CFH_BUF, off)); 579 off += WORD; 580 581 ze.setSize(ZipLong.getValue(CFH_BUF, off)); 582 off += WORD; 583 584 int fileNameLen = ZipShort.getValue(CFH_BUF, off); 585 off += SHORT; 586 587 int extraLen = ZipShort.getValue(CFH_BUF, off); 588 off += SHORT; 589 590 int commentLen = ZipShort.getValue(CFH_BUF, off); 591 off += SHORT; 592 593 int diskStart = ZipShort.getValue(CFH_BUF, off); 594 off += SHORT; 595 596 ze.setInternalAttributes(ZipShort.getValue(CFH_BUF, off)); 597 off += SHORT; 598 599 ze.setExternalAttributes(ZipLong.getValue(CFH_BUF, off)); 600 off += WORD; 601 602 byte[] fileName = new byte[fileNameLen]; 603 archive.readFully(fileName); 604 ze.setName(entryEncoding.decode(fileName), fileName); 605 606 // LFH offset, 607 offset.headerOffset = ZipLong.getValue(CFH_BUF, off); 608 // data offset will be filled later 609 entries.add(ze); 610 611 byte[] cdExtraData = new byte[extraLen]; 612 archive.readFully(cdExtraData); 613 ze.setCentralDirectoryExtra(cdExtraData); 614 615 setSizesAndOffsetFromZip64Extra(ze, offset, diskStart); 616 617 byte[] comment = new byte[commentLen]; 618 archive.readFully(comment); 619 ze.setComment(entryEncoding.decode(comment)); 620 621 if (!hasUTF8Flag && useUnicodeExtraFields) { 622 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 623 } 624 } 625 626 /** 627 * If the entry holds a Zip64 extended information extra field, 628 * read sizes from there if the entry's sizes are set to 629 * 0xFFFFFFFFF, do the same for the offset of the local file 630 * header. 631 * 632 * <p>Ensures the Zip64 extra either knows both compressed and 633 * uncompressed size or neither of both as the internal logic in 634 * ExtraFieldUtils forces the field to create local header data 635 * even if they are never used - and here a field with only one 636 * size would be invalid.</p> 637 */ 638 private void setSizesAndOffsetFromZip64Extra(ZipArchiveEntry ze, 639 OffsetEntry offset, 640 int diskStart) 641 throws IOException { 642 Zip64ExtendedInformationExtraField z64 = 643 (Zip64ExtendedInformationExtraField) 644 ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 645 if (z64 != null) { 646 boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; 647 boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; 648 boolean hasRelativeHeaderOffset = 649 offset.headerOffset == ZIP64_MAGIC; 650 z64.reparseCentralDirectoryData(hasUncompressedSize, 651 hasCompressedSize, 652 hasRelativeHeaderOffset, 653 diskStart == ZIP64_MAGIC_SHORT); 654 655 if (hasUncompressedSize) { 656 ze.setSize(z64.getSize().getLongValue()); 657 } else if (hasCompressedSize) { 658 z64.setSize(new ZipEightByteInteger(ze.getSize())); 659 } 660 661 if (hasCompressedSize) { 662 ze.setCompressedSize(z64.getCompressedSize().getLongValue()); 663 } else if (hasUncompressedSize) { 664 z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); 665 } 666 667 if (hasRelativeHeaderOffset) { 668 offset.headerOffset = 669 z64.getRelativeHeaderOffset().getLongValue(); 670 } 671 } 672 } 673 674 /** 675 * Length of the "End of central directory record" - which is 676 * supposed to be the last structure of the archive - without file 677 * comment. 678 */ 679 static final int MIN_EOCD_SIZE = 680 /* end of central dir signature */ WORD 681 /* number of this disk */ + SHORT 682 /* number of the disk with the */ 683 /* start of the central directory */ + SHORT 684 /* total number of entries in */ 685 /* the central dir on this disk */ + SHORT 686 /* total number of entries in */ 687 /* the central dir */ + SHORT 688 /* size of the central directory */ + WORD 689 /* offset of start of central */ 690 /* directory with respect to */ 691 /* the starting disk number */ + WORD 692 /* zipfile comment length */ + SHORT; 693 694 /** 695 * Maximum length of the "End of central directory record" with a 696 * file comment. 697 */ 698 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 699 /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; 700 701 /** 702 * Offset of the field that holds the location of the first 703 * central directory entry inside the "End of central directory 704 * record" relative to the start of the "End of central directory 705 * record". 706 */ 707 private static final int CFD_LOCATOR_OFFSET = 708 /* end of central dir signature */ WORD 709 /* number of this disk */ + SHORT 710 /* number of the disk with the */ 711 /* start of the central directory */ + SHORT 712 /* total number of entries in */ 713 /* the central dir on this disk */ + SHORT 714 /* total number of entries in */ 715 /* the central dir */ + SHORT 716 /* size of the central directory */ + WORD; 717 718 /** 719 * Length of the "Zip64 end of central directory locator" - which 720 * should be right in front of the "end of central directory 721 * record" if one is present at all. 722 */ 723 private static final int ZIP64_EOCDL_LENGTH = 724 /* zip64 end of central dir locator sig */ WORD 725 /* number of the disk with the start */ 726 /* start of the zip64 end of */ 727 /* central directory */ + WORD 728 /* relative offset of the zip64 */ 729 /* end of central directory record */ + DWORD 730 /* total number of disks */ + WORD; 731 732 /** 733 * Offset of the field that holds the location of the "Zip64 end 734 * of central directory record" inside the "Zip64 end of central 735 * directory locator" relative to the start of the "Zip64 end of 736 * central directory locator". 737 */ 738 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 739 /* zip64 end of central dir locator sig */ WORD 740 /* number of the disk with the start */ 741 /* start of the zip64 end of */ 742 /* central directory */ + WORD; 743 744 /** 745 * Offset of the field that holds the location of the first 746 * central directory entry inside the "Zip64 end of central 747 * directory record" relative to the start of the "Zip64 end of 748 * central directory record". 749 */ 750 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 751 /* zip64 end of central dir */ 752 /* signature */ WORD 753 /* size of zip64 end of central */ 754 /* directory record */ + DWORD 755 /* version made by */ + SHORT 756 /* version needed to extract */ + SHORT 757 /* number of this disk */ + WORD 758 /* number of the disk with the */ 759 /* start of the central directory */ + WORD 760 /* total number of entries in the */ 761 /* central directory on this disk */ + DWORD 762 /* total number of entries in the */ 763 /* central directory */ + DWORD 764 /* size of the central directory */ + DWORD; 765 766 /** 767 * Searches for either the "Zip64 end of central directory 768 * locator" or the "End of central dir record", parses 769 * it and positions the stream at the first central directory 770 * record. 771 */ 772 private void positionAtCentralDirectory() 773 throws IOException { 774 positionAtEndOfCentralDirectoryRecord(); 775 boolean found = false; 776 boolean searchedForZip64EOCD = 777 archive.getFilePointer() > ZIP64_EOCDL_LENGTH; 778 if (searchedForZip64EOCD) { 779 archive.seek(archive.getFilePointer() - ZIP64_EOCDL_LENGTH); 780 archive.readFully(WORD_BUF); 781 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, 782 WORD_BUF); 783 } 784 if (!found) { 785 // not a ZIP64 archive 786 if (searchedForZip64EOCD) { 787 skipBytes(ZIP64_EOCDL_LENGTH - WORD); 788 } 789 positionAtCentralDirectory32(); 790 } else { 791 positionAtCentralDirectory64(); 792 } 793 } 794 795 /** 796 * Parses the "Zip64 end of central directory locator", 797 * finds the "Zip64 end of central directory record" using the 798 * parsed information, parses that and positions the stream at the 799 * first central directory record. 800 * 801 * Expects stream to be positioned right behind the "Zip64 802 * end of central directory locator"'s signature. 803 */ 804 private void positionAtCentralDirectory64() 805 throws IOException { 806 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET 807 - WORD /* signature has already been read */); 808 archive.readFully(DWORD_BUF); 809 archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF)); 810 archive.readFully(WORD_BUF); 811 if (!Arrays.equals(WORD_BUF, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 812 throw new ZipException("archive's ZIP64 end of central " 813 + "directory locator is corrupt."); 814 } 815 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET 816 - WORD /* signature has already been read */); 817 archive.readFully(DWORD_BUF); 818 archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF)); 819 } 820 821 /** 822 * Parses the "End of central dir record" and positions 823 * the stream at the first central directory record. 824 * 825 * Expects stream to be positioned at the beginning of the 826 * "End of central dir record". 827 */ 828 private void positionAtCentralDirectory32() 829 throws IOException { 830 skipBytes(CFD_LOCATOR_OFFSET); 831 archive.readFully(WORD_BUF); 832 archive.seek(ZipLong.getValue(WORD_BUF)); 833 } 834 835 /** 836 * Searches for the and positions the stream at the start of the 837 * "End of central dir record". 838 */ 839 private void positionAtEndOfCentralDirectoryRecord() 840 throws IOException { 841 boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, 842 ZipArchiveOutputStream.EOCD_SIG); 843 if (!found) { 844 throw new ZipException("archive is not a ZIP archive"); 845 } 846 } 847 848 /** 849 * Searches the archive backwards from minDistance to maxDistance 850 * for the given signature, positions the RandomaccessFile right 851 * at the signature if it has been found. 852 */ 853 private boolean tryToLocateSignature(long minDistanceFromEnd, 854 long maxDistanceFromEnd, 855 byte[] sig) throws IOException { 856 boolean found = false; 857 long off = archive.length() - minDistanceFromEnd; 858 final long stopSearching = 859 Math.max(0L, archive.length() - maxDistanceFromEnd); 860 if (off >= 0) { 861 for (; off >= stopSearching; off--) { 862 archive.seek(off); 863 int curr = archive.read(); 864 if (curr == -1) { 865 break; 866 } 867 if (curr == sig[POS_0]) { 868 curr = archive.read(); 869 if (curr == sig[POS_1]) { 870 curr = archive.read(); 871 if (curr == sig[POS_2]) { 872 curr = archive.read(); 873 if (curr == sig[POS_3]) { 874 found = true; 875 break; 876 } 877 } 878 } 879 } 880 } 881 } 882 if (found) { 883 archive.seek(off); 884 } 885 return found; 886 } 887 888 /** 889 * Skips the given number of bytes or throws an EOFException if 890 * skipping failed. 891 */ 892 private void skipBytes(final int count) throws IOException { 893 int totalSkipped = 0; 894 while (totalSkipped < count) { 895 int skippedNow = archive.skipBytes(count - totalSkipped); 896 if (skippedNow <= 0) { 897 throw new EOFException(); 898 } 899 totalSkipped += skippedNow; 900 } 901 } 902 903 /** 904 * Number of bytes in local file header up to the "length of 905 * filename" entry. 906 */ 907 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 908 /* local file header signature */ WORD 909 /* version needed to extract */ + SHORT 910 /* general purpose bit flag */ + SHORT 911 /* compression method */ + SHORT 912 /* last mod file time */ + SHORT 913 /* last mod file date */ + SHORT 914 /* crc-32 */ + WORD 915 /* compressed size */ + WORD 916 /* uncompressed size */ + WORD; 917 918 /** 919 * Walks through all recorded entries and adds the data available 920 * from the local file header. 921 * 922 * <p>Also records the offsets for the data to read from the 923 * entries.</p> 924 */ 925 private void resolveLocalFileHeaderData(Map<ZipArchiveEntry, NameAndComment> 926 entriesWithoutUTF8Flag) 927 throws IOException { 928 for (ZipArchiveEntry zipArchiveEntry : entries) { 929 // entries is filled in populateFromCentralDirectory and 930 // never modified 931 Entry ze = (Entry) zipArchiveEntry; 932 OffsetEntry offsetEntry = ze.getOffsetEntry(); 933 long offset = offsetEntry.headerOffset; 934 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 935 archive.readFully(SHORT_BUF); 936 int fileNameLen = ZipShort.getValue(SHORT_BUF); 937 archive.readFully(SHORT_BUF); 938 int extraFieldLen = ZipShort.getValue(SHORT_BUF); 939 int lenToSkip = fileNameLen; 940 while (lenToSkip > 0) { 941 int skipped = archive.skipBytes(lenToSkip); 942 if (skipped <= 0) { 943 throw new IOException("failed to skip file name in" 944 + " local file header"); 945 } 946 lenToSkip -= skipped; 947 } 948 byte[] localExtraData = new byte[extraFieldLen]; 949 archive.readFully(localExtraData); 950 ze.setExtra(localExtraData); 951 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH 952 + SHORT + SHORT + fileNameLen + extraFieldLen; 953 954 if (entriesWithoutUTF8Flag.containsKey(ze)) { 955 NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 956 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, 957 nc.comment); 958 } 959 960 String name = ze.getName(); 961 LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.get(name); 962 if (entriesOfThatName == null) { 963 entriesOfThatName = new LinkedList<ZipArchiveEntry>(); 964 nameMap.put(name, entriesOfThatName); 965 } 966 entriesOfThatName.addLast(ze); 967 } 968 } 969 970 /** 971 * Checks whether the archive starts with a LFH. If it doesn't, 972 * it may be an empty archive. 973 */ 974 private boolean startsWithLocalFileHeader() throws IOException { 975 archive.seek(0); 976 archive.readFully(WORD_BUF); 977 return Arrays.equals(WORD_BUF, ZipArchiveOutputStream.LFH_SIG); 978 } 979 980 /** 981 * InputStream that delegates requests to the underlying 982 * RandomAccessFile, making sure that only bytes from a certain 983 * range can be read. 984 */ 985 private class BoundedInputStream extends InputStream { 986 private long remaining; 987 private long loc; 988 private boolean addDummyByte = false; 989 990 BoundedInputStream(long start, long remaining) { 991 this.remaining = remaining; 992 loc = start; 993 } 994 995 @Override 996 public int read() throws IOException { 997 if (remaining-- <= 0) { 998 if (addDummyByte) { 999 addDummyByte = false; 1000 return 0; 1001 } 1002 return -1; 1003 } 1004 synchronized (archive) { 1005 archive.seek(loc++); 1006 return archive.read(); 1007 } 1008 } 1009 1010 @Override 1011 public int read(byte[] b, int off, int len) throws IOException { 1012 if (remaining <= 0) { 1013 if (addDummyByte) { 1014 addDummyByte = false; 1015 b[off] = 0; 1016 return 1; 1017 } 1018 return -1; 1019 } 1020 1021 if (len <= 0) { 1022 return 0; 1023 } 1024 1025 if (len > remaining) { 1026 len = (int) remaining; 1027 } 1028 int ret = -1; 1029 synchronized (archive) { 1030 archive.seek(loc); 1031 ret = archive.read(b, off, len); 1032 } 1033 if (ret > 0) { 1034 loc += ret; 1035 remaining -= ret; 1036 } 1037 return ret; 1038 } 1039 1040 /** 1041 * Inflater needs an extra dummy byte for nowrap - see 1042 * Inflater's javadocs. 1043 */ 1044 void addDummy() { 1045 addDummyByte = true; 1046 } 1047 } 1048 1049 private static final class NameAndComment { 1050 private final byte[] name; 1051 private final byte[] comment; 1052 private NameAndComment(byte[] name, byte[] comment) { 1053 this.name = name; 1054 this.comment = comment; 1055 } 1056 } 1057 1058 /** 1059 * Compares two ZipArchiveEntries based on their offset within the archive. 1060 * 1061 * <p>Won't return any meaningful results if one of the entries 1062 * isn't part of the archive at all.</p> 1063 * 1064 * @since 1.1 1065 */ 1066 private final Comparator<ZipArchiveEntry> OFFSET_COMPARATOR = 1067 new Comparator<ZipArchiveEntry>() { 1068 public int compare(ZipArchiveEntry e1, ZipArchiveEntry e2) { 1069 if (e1 == e2) { 1070 return 0; 1071 } 1072 1073 Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null; 1074 Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null; 1075 if (ent1 == null) { 1076 return 1; 1077 } 1078 if (ent2 == null) { 1079 return -1; 1080 } 1081 long val = (ent1.getOffsetEntry().headerOffset 1082 - ent2.getOffsetEntry().headerOffset); 1083 return val == 0 ? 0 : val < 0 ? -1 : +1; 1084 } 1085 }; 1086 1087 /** 1088 * Extends ZipArchiveEntry to store the offset within the archive. 1089 */ 1090 private static class Entry extends ZipArchiveEntry { 1091 1092 private final OffsetEntry offsetEntry; 1093 1094 Entry(OffsetEntry offset) { 1095 this.offsetEntry = offset; 1096 } 1097 1098 OffsetEntry getOffsetEntry() { 1099 return offsetEntry; 1100 } 1101 1102 @Override 1103 public int hashCode() { 1104 return 3 * super.hashCode() 1105 + (int) (offsetEntry.headerOffset % Integer.MAX_VALUE); 1106 } 1107 1108 @Override 1109 public boolean equals(Object other) { 1110 if (super.equals(other)) { 1111 // super.equals would return false if other were not an Entry 1112 Entry otherEntry = (Entry) other; 1113 return offsetEntry.headerOffset 1114 == otherEntry.offsetEntry.headerOffset 1115 && offsetEntry.dataOffset 1116 == otherEntry.offsetEntry.dataOffset; 1117 } 1118 return false; 1119 } 1120 } 1121}