LibArchive: Add Zip file parser

This is based on the zip specification on PKWARE's zip specification
(https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT) and will
be used in the unzip utility and eventually in the zip utility.
This commit is contained in:
Idan Horowitz 2021-03-18 22:35:14 +02:00 committed by Andreas Kling
parent a809db90dd
commit caf4bde3a9
Notes: sideshowbarker 2024-07-18 21:07:50 +09:00
3 changed files with 357 additions and 2 deletions

View File

@ -1,6 +1,7 @@
set(SOURCES
TarStream.cpp
)
TarStream.cpp
Zip.cpp
)
serenity_lib(LibArchive archive)
target_link_libraries(LibArchive LibCore)

View File

@ -0,0 +1,120 @@
/*
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <LibArchive/Zip.h>
namespace Archive {
bool Zip::find_end_of_central_directory_offset(const ReadonlyBytes& buffer, size_t& offset)
{
for (size_t backwards_offset = 0; backwards_offset <= UINT16_MAX; backwards_offset++) // the file may have a trailing comment of an arbitrary 16 bit length
{
if (buffer.size() < (sizeof(EndOfCentralDirectory) - sizeof(u8*)) + backwards_offset)
return false;
auto signature_offset = (buffer.size() - (sizeof(EndOfCentralDirectory) - sizeof(u8*)) - backwards_offset);
if (memcmp(buffer.data() + signature_offset, end_of_central_directory_signature, sizeof(end_of_central_directory_signature)) == 0) {
offset = signature_offset;
return true;
}
}
return false;
}
Optional<Zip> Zip::try_create(const ReadonlyBytes& buffer)
{
size_t end_of_central_directory_offset;
if (!find_end_of_central_directory_offset(buffer, end_of_central_directory_offset))
return {};
EndOfCentralDirectory end_of_central_directory {};
if (!end_of_central_directory.read(buffer.slice(end_of_central_directory_offset)))
return {};
if (end_of_central_directory.disk_number != 0 || end_of_central_directory.central_directory_start_disk != 0 || end_of_central_directory.disk_records_count != end_of_central_directory.total_records_count)
return {}; // TODO: support multi-volume zip archives
size_t member_offset = end_of_central_directory.central_directory_offset;
for (size_t i = 0; i < end_of_central_directory.total_records_count; i++) {
CentralDirectoryRecord central_directory_record {};
if (!central_directory_record.read(buffer.slice(member_offset)))
return {};
if (central_directory_record.general_purpose_flags & 1)
return {}; // TODO: support encrypted zip members
if (central_directory_record.general_purpose_flags & 3)
return {}; // TODO: support zip data descriptors
if (central_directory_record.compression_method != ZipCompressionMethod::Store && central_directory_record.compression_method != ZipCompressionMethod::Deflate)
return {}; // TODO: support obsolete zip compression methods
if (central_directory_record.compression_method == ZipCompressionMethod::Store && central_directory_record.uncompressed_size != central_directory_record.compressed_size)
return {};
if (central_directory_record.start_disk != 0)
return {}; // TODO: support multi-volume zip archives
if (memchr(central_directory_record.name, 0, central_directory_record.name_length) != nullptr)
return {};
LocalFileHeader local_file_header {};
if (!local_file_header.read(buffer.slice(central_directory_record.local_file_header_offset)))
return {};
if (buffer.size() - (local_file_header.compressed_data - buffer.data()) < central_directory_record.compressed_size)
return {};
member_offset += central_directory_record.size();
}
Zip zip;
zip.m_input_data = buffer;
zip.member_count = end_of_central_directory.total_records_count;
zip.members_start_offset = end_of_central_directory.central_directory_offset;
return zip;
}
bool Zip::for_each_member(Function<IterationDecision(const ZipMember&)> callback)
{
size_t member_offset = members_start_offset;
for (size_t i = 0; i < member_count; i++) {
CentralDirectoryRecord central_directory_record {};
VERIFY(central_directory_record.read(m_input_data.slice(member_offset)));
LocalFileHeader local_file_header {};
VERIFY(local_file_header.read(m_input_data.slice(central_directory_record.local_file_header_offset)));
ZipMember member;
char null_terminated_name[central_directory_record.name_length + 1];
memcpy(null_terminated_name, central_directory_record.name, central_directory_record.name_length);
null_terminated_name[central_directory_record.name_length] = 0;
member.name = String { null_terminated_name };
member.compressed_data = { local_file_header.compressed_data, central_directory_record.compressed_size };
member.compression_method = static_cast<ZipCompressionMethod>(central_directory_record.compression_method);
member.uncompressed_size = central_directory_record.uncompressed_size;
member.crc32 = central_directory_record.crc32;
member.is_directory = central_directory_record.external_attributes & zip_directory_external_attribute || member.name.ends_with('/'); // FIXME: better directory detection
if (callback(member) == IterationDecision::Break)
return false;
member_offset += central_directory_record.size();
}
return true;
}
}

View File

@ -0,0 +1,234 @@
/*
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/Function.h>
#include <AK/IterationDecision.h>
#include <AK/Span.h>
#include <AK/Stream.h>
#include <AK/String.h>
#include <string.h>
namespace Archive {
// NOTE: Due to the format of zip files compression is streamed and decompression is random access.
static constexpr u8 end_of_central_directory_signature[] = { 0x50, 0x4b, 0x05, 0x06 }; // 'PK\x05\x06'
struct [[gnu::packed]] EndOfCentralDirectory {
u16 disk_number;
u16 central_directory_start_disk;
u16 disk_records_count;
u16 total_records_count;
u32 central_directory_size;
u32 central_directory_offset;
u16 comment_length;
const u8* comment;
bool read(ReadonlyBytes buffer)
{
auto fields_size = sizeof(EndOfCentralDirectory) - sizeof(u8*);
if (buffer.size() < fields_size)
return false;
if (memcmp(buffer.data(), end_of_central_directory_signature, sizeof(end_of_central_directory_signature)) != 0)
return false;
memcpy(reinterpret_cast<void*>(&disk_number), buffer.data() + sizeof(end_of_central_directory_signature), fields_size);
comment = buffer.data() + sizeof(end_of_central_directory_signature) + fields_size;
return true;
}
void write(OutputStream& stream) const
{
stream.write_or_error({ end_of_central_directory_signature, sizeof(end_of_central_directory_signature) });
stream << disk_number;
stream << central_directory_start_disk;
stream << disk_records_count;
stream << total_records_count;
stream << central_directory_size;
stream << central_directory_offset;
stream << comment_length;
if (comment_length > 0)
stream.write_or_error({ comment, comment_length });
}
};
static constexpr u8 central_directory_record_signature[] = { 0x50, 0x4b, 0x01, 0x02 }; // 'PK\x01\x02'
struct [[gnu::packed]] CentralDirectoryRecord {
u16 made_by_version;
u16 minimum_version;
u16 general_purpose_flags;
u16 compression_method;
u16 modification_time;
u16 modification_date;
u32 crc32;
u32 compressed_size;
u32 uncompressed_size;
u16 name_length;
u16 extra_data_length;
u16 comment_length;
u16 start_disk;
u16 internal_attributes;
u32 external_attributes;
u32 local_file_header_offset;
const u8* name;
const u8* extra_data;
const u8* comment;
bool read(ReadonlyBytes buffer)
{
auto fields_size = sizeof(CentralDirectoryRecord) - (sizeof(u8*) * 3);
if (buffer.size() < fields_size)
return false;
if (memcmp(buffer.data(), central_directory_record_signature, sizeof(central_directory_record_signature)) != 0)
return false;
memcpy(reinterpret_cast<void*>(&made_by_version), buffer.data() + sizeof(central_directory_record_signature), fields_size);
name = buffer.data() + sizeof(central_directory_record_signature) + fields_size;
extra_data = name + name_length;
comment = extra_data + extra_data_length;
return true;
}
void write(OutputStream& stream) const
{
stream.write_or_error({ central_directory_record_signature, sizeof(central_directory_record_signature) });
stream << made_by_version;
stream << minimum_version;
stream << general_purpose_flags;
stream << compression_method;
stream << modification_time;
stream << modification_date;
stream << crc32;
stream << compressed_size;
stream << uncompressed_size;
stream << name_length;
stream << extra_data_length;
stream << comment_length;
stream << start_disk;
stream << internal_attributes;
stream << external_attributes;
stream << local_file_header_offset;
if (name_length > 0)
stream.write_or_error({ name, name_length });
if (extra_data_length > 0)
stream.write_or_error({ extra_data, extra_data_length });
if (comment_length > 0)
stream.write_or_error({ comment, comment_length });
}
[[nodiscard]] size_t size() const
{
return sizeof(central_directory_record_signature) + (sizeof(CentralDirectoryRecord) - (sizeof(u8*) * 3)) + name_length + extra_data_length + comment_length;
}
};
static constexpr u32 zip_directory_external_attribute = 1 << 4;
static constexpr u8 local_file_header_signature[] = { 0x50, 0x4b, 0x03, 0x04 }; // 'PK\x03\x04'
struct [[gnu::packed]] LocalFileHeader {
u16 minimum_version;
u16 general_purpose_flags;
u16 compression_method;
u16 modification_time;
u16 modification_date;
u32 crc32;
u32 compressed_size;
u32 uncompressed_size;
u16 name_length;
u16 extra_data_length;
const u8* name;
const u8* extra_data;
const u8* compressed_data;
bool read(ReadonlyBytes buffer)
{
auto fields_size = sizeof(LocalFileHeader) - (sizeof(u8*) * 3);
if (buffer.size() < fields_size)
return false;
if (memcmp(buffer.data(), local_file_header_signature, sizeof(local_file_header_signature)) != 0)
return false;
memcpy(reinterpret_cast<void*>(&minimum_version), buffer.data() + sizeof(local_file_header_signature), fields_size);
name = buffer.data() + sizeof(local_file_header_signature) + fields_size;
extra_data = name + name_length;
compressed_data = extra_data + extra_data_length;
return true;
}
void write(OutputStream& stream) const
{
stream.write_or_error({ local_file_header_signature, sizeof(local_file_header_signature) });
stream << minimum_version;
stream << general_purpose_flags;
stream << compression_method;
stream << modification_time;
stream << modification_date;
stream << crc32;
stream << compressed_size;
stream << uncompressed_size;
stream << name_length;
stream << extra_data_length;
if (name_length > 0)
stream.write_or_error({ name, name_length });
if (extra_data_length > 0)
stream.write_or_error({ extra_data, extra_data_length });
if (compressed_size > 0)
stream.write_or_error({ compressed_data, compressed_size });
}
};
enum ZipCompressionMethod : u16 {
Store = 0,
Shrink = 1,
Reduce1 = 2,
Reduce2 = 3,
Reduce3 = 4,
Reduce4 = 5,
Implode = 6,
Reserved = 7,
Deflate = 8
};
struct ZipMember {
String name;
ReadonlyBytes compressed_data; // TODO: maybe the decompression/compression should be handled by LibArchive instead of the user?
ZipCompressionMethod compression_method;
u32 uncompressed_size;
u32 crc32;
bool is_directory;
};
class Zip {
public:
static Optional<Zip> try_create(const ReadonlyBytes& buffer);
bool for_each_member(Function<IterationDecision(const ZipMember&)>);
private:
static bool find_end_of_central_directory_offset(const ReadonlyBytes&, size_t& offset);
u16 member_count { 0 };
size_t members_start_offset { 0 };
ReadonlyBytes m_input_data;
};
}