zlib: Add support for building with Chromium's zlib implementation This change builds libgit2 using Chromium's zlib implementation by invoking cmake with `-DUSE_BUNDLED_ZLIB=ON -DUSE_CHROMIUM_ZLIB=ON`, which is ~10% faster than the bundled zlib for the core::zstream suite. This version of zlib has some optimizations: a) Decompression (Intel+ARM): inflate_fast, adler32, crc32, etc. b) Compression (Intel): fill_window, longest_match, hash function, etc. Due to the introduction of SIMD optimizations, and to get the maximum performance out of this fork of zlib, this requires an x86_64 processor with SSE4.2 and CLMUL (anything Westmere or later, ~2010). The Chromium zlib implementation also supports ARM with NEON, but it has not been enabled in this patch. Performance =========== TL;DR: Running just `./libgit2_clar -score::zstream` 100 times in a loop took 0:56.30 before and 0:50.67 after (~10% reduction!). The bundled and system zlib implementations on an Ubuntu Focal system perform relatively similar (the bundled one is marginally better due to the compiler being able to inline some functions), so only the bundled and Chromium zlibs were compared. For a more balanced comparison (to ensure that nothing regressed overall), `libgit2_clar` under `perf` was also run, and the zlib-related functions were compared. Bundled ------- ```shell cmake \ -DUSE_BUNDLED_ZLIB=ON \ -DUSE_CHROMIUM_ZLIB=OFF \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DCMAKE_C_FLAGS="-fPIC -fno-omit-frame-pointer" \ -GNinja \ .. ninja perf record --call-graph=dwarf ./libgit2_clar perf report --children ``` ``` Samples: 87K of event 'cycles', Event count (approx.): 75923450603 Children Self Command Shared Objec Symbol + 4.14% 0.01% libgit2_clar libgit2_clar [.] git_zstream_get_output_chunk + 2.91% 0.00% libgit2_clar libgit2_clar [.] git_zstream_get_output + 0.69% 0.00% libgit2_clar libgit2_clar [.] git_zstream_get_output (inlined) 0.17% 0.00% libgit2_clar libgit2_clar [.] git_zstream_init 0.02% 0.00% libgit2_clar libgit2_clar [.] git_zstream_reset 0.00% 0.00% libgit2_clar libgit2_clar [.] git_zstream_eos 0.00% 0.00% libgit2_clar libgit2_clar [.] git_zstream_done 0.00% 0.00% libgit2_clar libgit2_clar [.] git_zstream_free (inlined) Samples: 87K of event 'cycles', Event count (approx.): 75923450603 Children Self Command Shared Objec Symbol + 3.12% 0.01% libgit2_clar libgit2_clar [.] deflate + 2.65% 1.48% libgit2_clar libgit2_clar [.] deflate_slow + 1.60% 0.55% libgit2_clar libgit2_clar [.] inflate + 0.53% 0.00% libgit2_clar libgit2_clar [.] write_deflate 0.49% 0.36% libgit2_clar libgit2_clar [.] inflate_fast 0.46% 0.02% libgit2_clar libgit2_clar [.] deflate_fast 0.19% 0.19% libgit2_clar libgit2_clar [.] inflate_table 0.16% 0.01% libgit2_clar libgit2_clar [.] inflateInit_ 0.15% 0.00% libgit2_clar libgit2_clar [.] inflateInit2_ (inlined) 0.10% 0.00% libgit2_clar libgit2_clar [.] deflateInit_ 0.10% 0.00% libgit2_clar libgit2_clar [.] deflateInit2_ 0.03% 0.00% libgit2_clar libgit2_clar [.] deflateReset (inlined) 0.02% 0.00% libgit2_clar libgit2_clar [.] deflateReset 0.02% 0.00% libgit2_clar libgit2_clar [.] inflateEnd 0.02% 0.00% libgit2_clar libgit2_clar [.] deflateEnd 0.01% 0.00% libgit2_clar libgit2_clar [.] deflateResetKeep 0.01% 0.01% libgit2_clar libgit2_clar [.] inflateReset2 0.01% 0.00% libgit2_clar libgit2_clar [.] deflateReset (inlined) 0.00% 0.00% libgit2_clar libgit2_clar [.] inflateStateCheck (inlined) 0.00% 0.00% libgit2_clar libgit2_clar [.] inflateReset (inlined) 0.00% 0.00% libgit2_clar libgit2_clar [.] inflateStateCheck (inlined) 0.00% 0.00% libgit2_clar libgit2_clar [.] deflateStateCheck (inlined) 0.00% 0.00% libgit2_clar libgit2_clar [.] inflateResetKeep (inlined) ``` Chromium -------- ```shell cmake \ -DUSE_BUNDLED_ZLIB=ON \ -DUSE_CHROMIUM_ZLIB=ON \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DCMAKE_C_FLAGS="-fPIC -fno-omit-frame-pointer" \ -GNinja \ .. ninja perf record --call-graph=dwarf ./libgit2_clar perf report --children ``` ``` Samples: 97K of event 'cycles', Event count (approx.): 80862210917 Children Self Command Shared Objec Symbol + 3.31% 0.00% libgit2_clar libgit2_clar [.] git_zstream_get_output_chunk + 2.27% 0.01% libgit2_clar libgit2_clar [.] git_zstream_get_output + 0.55% 0.00% libgit2_clar libgit2_clar [.] git_zstream_get_output (inlined) 0.18% 0.00% libgit2_clar libgit2_clar [.] git_zstream_init 0.02% 0.00% libgit2_clar libgit2_clar [.] git_zstream_reset 0.00% 0.00% libgit2_clar libgit2_clar [.] git_zstream_free (inlined) 0.00% 0.00% libgit2_clar libgit2_clar [.] git_zstream_done 0.00% 0.00% libgit2_clar libgit2_clar [.] git_zstream_free Samples: 97K of event 'cycles', Event count (approx.): 80862210917 Children Self Command Shared Objec Symbol + 2.55% 0.01% libgit2_clar libgit2_clar [.] deflate + 2.25% 1.41% libgit2_clar libgit2_clar [.] deflate_slow + 1.10% 0.52% libgit2_clar libgit2_clar [.] inflate 0.36% 0.00% libgit2_clar libgit2_clar [.] write_deflate 0.30% 0.03% libgit2_clar libgit2_clar [.] deflate_fast 0.28% 0.15% libgit2_clar libgit2_clar [.] inflate_fast_chunk_ 0.19% 0.19% libgit2_clar libgit2_clar [.] inflate_table 0.17% 0.01% libgit2_clar libgit2_clar [.] inflateInit_ 0.16% 0.00% libgit2_clar libgit2_clar [.] inflateInit2_ (inlined) 0.15% 0.00% libgit2_clar libgit2_clar [.] deflateInit_ 0.15% 0.00% libgit2_clar libgit2_clar [.] deflateInit2_ 0.11% 0.01% libgit2_clar libgit2_clar [.] adler32_z 0.09% 0.09% libgit2_clar libgit2_clar [.] adler32_simd_ 0.05% 0.00% libgit2_clar libgit2_clar [.] deflateReset (inlined) 0.05% 0.00% libgit2_clar libgit2_clar [.] deflate_read_buf 0.03% 0.00% libgit2_clar libgit2_clar [.] inflateEnd 0.02% 0.00% libgit2_clar libgit2_clar [.] deflateReset 0.01% 0.00% libgit2_clar libgit2_clar [.] deflateEnd 0.01% 0.01% libgit2_clar libgit2_clar [.] inflateReset2 0.01% 0.00% libgit2_clar libgit2_clar [.] inflateReset (inlined) 0.00% 0.00% libgit2_clar libgit2_clar [.] adler32 0.00% 0.00% libgit2_clar libgit2_clar [.] inflateResetKeep (inlined) 0.00% 0.00% libgit2_clar libgit2_clar [.] deflateResetKeep 0.00% 0.00% libgit2_clar libgit2_clar [.] inflateStateCheck (inlined) 0.00% 0.00% libgit2_clar libgit2_clar [.] inflateStateCheck (inlined) 0.00% 0.00% libgit2_clar libgit2_clar [.] inflateStateCheck (inlined) 0.00% 0.00% libgit2_clar libgit2_clar [.] deflateStateCheck (inlined) ```
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 05ad1ba..a8f6b75 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,6 +51,7 @@ OPTION(USE_LEAK_CHECKER "Run tests with leak checker" OFF)
OPTION(DEBUG_POOL "Enable debug pool allocator" OFF)
OPTION(ENABLE_WERROR "Enable compilation with -Werror" OFF)
OPTION(USE_BUNDLED_ZLIB "Use the bundled version of zlib" OFF)
+OPTION(USE_CHROMIUM_ZLIB "If using the bundled version of zlib, use the Chromium flavor (x86_64 processor with SSE4.2 and CLMUL required)" OFF)
SET(USE_HTTP_PARSER "" CACHE STRING "Specifies the HTTP Parser implementation; either system or builtin.")
OPTION(DEPRECATE_HARD "Do not include deprecated functions in the library" OFF)
SET(REGEX_BACKEND "" CACHE STRING "Regular expression implementation. One of regcomp_l, pcre2, pcre, regcomp, or builtin.")
diff --git a/deps/chromium-zlib/CMakeLists.txt b/deps/chromium-zlib/CMakeLists.txt
new file mode 100644
index 0000000..bbb35d4
--- /dev/null
+++ b/deps/chromium-zlib/CMakeLists.txt
@@ -0,0 +1,101 @@
+# CMake build script for the bundled Chromium zlib implementation. So far, it
+# is only supported for x86_64 processors with CLMUL, SSE3, SSE4.2.
+#
+# TODO: The Chromium build file (in deps/chromium-zlib/zlib/BUILD.gn) supports
+# more platforms (like ARM with NEON), more can be enabled as needed.
+
+CMAKE_MINIMUM_REQUIRED(VERSION 3.11)
+
+include(FetchContent)
+include(FindGit)
+
+# Ensure that the git binary is present to download the sources.
+find_package(Git)
+IF(NOT Git_FOUND)
+ message(FATAL_ERROR "git is required to download the Chromium zlib sources")
+ENDIF()
+
+FetchContent_Populate(chromium_zlib_src
+ GIT_REPOSITORY https://chromium.googlesource.com/chromium/src/third_party/zlib.git
+ GIT_TAG 2c183c9f93a328bfb3121284da13cf89a0f7e64a
+ QUIET
+)
+
+# The Chromium build globally disables some warnings.
+disable_warnings(implicit-fallthrough)
+disable_warnings(unused-function)
+disable_warnings(unused-parameter)
+disable_warnings(sign-compare)
+disable_warnings(declaration-after-statement)
+disable_warnings(missing-declarations)
+
+# -O3 is also set by the Chromium configuration and has been deemed safe enough
+# for them.
+SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
+SET(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG")
+
+# Common definitions.
+add_definitions(
+ -DSTDC
+ -DNO_GZIP
+ -DZLIB_IMPLEMENTATION
+)
+list(APPEND SRC_ZLIB
+ "${chromium_zlib_src_SOURCE_DIR}/adler32.c"
+ "${chromium_zlib_src_SOURCE_DIR}/chromeconf.h"
+ "${chromium_zlib_src_SOURCE_DIR}/compress.c"
+ "${chromium_zlib_src_SOURCE_DIR}/contrib/optimizations/insert_string.h"
+ "${chromium_zlib_src_SOURCE_DIR}/cpu_features.c"
+ "${chromium_zlib_src_SOURCE_DIR}/cpu_features.h"
+ "${chromium_zlib_src_SOURCE_DIR}/crc32.c"
+ "${chromium_zlib_src_SOURCE_DIR}/crc32.h"
+ "${chromium_zlib_src_SOURCE_DIR}/deflate.c"
+ "${chromium_zlib_src_SOURCE_DIR}/deflate.h"
+ "${chromium_zlib_src_SOURCE_DIR}/gzclose.c"
+ "${chromium_zlib_src_SOURCE_DIR}/gzguts.h"
+ "${chromium_zlib_src_SOURCE_DIR}/gzlib.c"
+ "${chromium_zlib_src_SOURCE_DIR}/gzread.c"
+ "${chromium_zlib_src_SOURCE_DIR}/gzwrite.c"
+ "${chromium_zlib_src_SOURCE_DIR}/infback.c"
+ "${chromium_zlib_src_SOURCE_DIR}/inffast.c"
+ "${chromium_zlib_src_SOURCE_DIR}/inffast.h"
+ "${chromium_zlib_src_SOURCE_DIR}/inffixed.h"
+ "${chromium_zlib_src_SOURCE_DIR}/inflate.h"
+ "${chromium_zlib_src_SOURCE_DIR}/inftrees.c"
+ "${chromium_zlib_src_SOURCE_DIR}/inftrees.h"
+ "${chromium_zlib_src_SOURCE_DIR}/trees.c"
+ "${chromium_zlib_src_SOURCE_DIR}/trees.h"
+ "${chromium_zlib_src_SOURCE_DIR}/uncompr.c"
+ "${chromium_zlib_src_SOURCE_DIR}/zconf.h"
+ "${chromium_zlib_src_SOURCE_DIR}/zlib.h"
+ "${chromium_zlib_src_SOURCE_DIR}/zutil.c"
+ "${chromium_zlib_src_SOURCE_DIR}/zutil.h"
+)
+
+# x86_64-specific optimizations
+string(APPEND CMAKE_C_FLAGS " -mssse3 -msse4.2 -mpclmul")
+add_definitions(
+ -DCHROMIUM_ZLIB_NO_CHROMECONF
+ -DX86_NOT_WINDOWS
+ -DADLER32_SIMD_SSSE3
+ -DCRC32_SIMD_SSE42_PCLMUL
+ -DDEFLATE_FILL_WINDOW_SSE2
+ -DINFLATE_CHUNK_READ_64LE
+ -DINFLATE_CHUNK_SIMD_SSE2
+)
+list(APPEND SRC_ZLIB
+ "${chromium_zlib_src_SOURCE_DIR}/adler32_simd.c"
+ "${chromium_zlib_src_SOURCE_DIR}/adler32_simd.h"
+ "${chromium_zlib_src_SOURCE_DIR}/contrib/optimizations/chunkcopy.h"
+ "${chromium_zlib_src_SOURCE_DIR}/contrib/optimizations/inffast_chunk.c"
+ "${chromium_zlib_src_SOURCE_DIR}/contrib/optimizations/inffast_chunk.h"
+ "${chromium_zlib_src_SOURCE_DIR}/contrib/optimizations/inflate.c"
+ "${chromium_zlib_src_SOURCE_DIR}/crc32_simd.c"
+ "${chromium_zlib_src_SOURCE_DIR}/crc32_simd.h"
+ "${chromium_zlib_src_SOURCE_DIR}/crc_folding.c"
+ "${chromium_zlib_src_SOURCE_DIR}/fill_window_sse.c"
+)
+
+list(SORT SRC_ZLIB)
+include_directories("${chromium_zlib_src_SOURCE_DIR}")
+add_library(chromium_zlib OBJECT ${SRC_ZLIB})
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d01cc64..65a289b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -202,10 +202,17 @@ IF(NOT USE_BUNDLED_ZLIB)
ENDIF()
ENDIF()
IF(USE_BUNDLED_ZLIB OR NOT ZLIB_FOUND)
- ADD_SUBDIRECTORY("${libgit2_SOURCE_DIR}/deps/zlib" "${libgit2_BINARY_DIR}/deps/zlib")
- LIST(APPEND LIBGIT2_INCLUDES "${libgit2_SOURCE_DIR}/deps/zlib")
- LIST(APPEND LIBGIT2_OBJECTS $<TARGET_OBJECTS:zlib>)
- ADD_FEATURE_INFO(zlib ON "using bundled zlib")
+ IF(USE_CHROMIUM_ZLIB)
+ ADD_SUBDIRECTORY("${libgit2_SOURCE_DIR}/deps/chromium-zlib" "${libgit2_BINARY_DIR}/deps/chromium-zlib")
+ LIST(APPEND LIBGIT2_INCLUDES "${libgit2_SOURCE_DIR}/deps/chromium-zlib")
+ LIST(APPEND LIBGIT2_OBJECTS $<TARGET_OBJECTS:chromium_zlib>)
+ ADD_FEATURE_INFO(zlib ON "using (Chromium) bundled zlib")
+ ELSE()
+ ADD_SUBDIRECTORY("${libgit2_SOURCE_DIR}/deps/zlib" "${libgit2_BINARY_DIR}/deps/zlib")
+ LIST(APPEND LIBGIT2_INCLUDES "${libgit2_SOURCE_DIR}/deps/zlib")
+ LIST(APPEND LIBGIT2_OBJECTS $<TARGET_OBJECTS:zlib>)
+ ADD_FEATURE_INFO(zlib ON "using bundled zlib")
+ ENDIF()
ENDIF()
# Optional external dependency: libssh2