[ftfuzzer] Add support for multiple files (patch #8779). Currently, libFuzzer only supports mutation of a single file. We circumvent this problem by using an uncompressed tar archive as multiple-file input for the fuzzer. This patch enables tests of `FT_Attach_Stream' and AFM/PFM parsing; a constructed tarball should contain a font file as the first element, and files to be attached as further elements. * src/tools/ftfuzzer/ftfuzzer.cc: Include libarchive headers. (archive_read_entry_data, parse_data): New functions. (LLVMFuzzerTestOneInput): Updated. * src/tools/ftfuzzer/ftmutator.cc: New file, providing a custom mutator for libFuzzer that can mutate tarballs in a sensible way.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
diff --git a/ChangeLog b/ChangeLog
index 44fc7de..ee5e5e9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2015-11-02 Bungeman <bungeman@gmail.com>
+
+ [ftfuzzer] Add support for multiple files (patch #8779).
+
+ Currently, libFuzzer only supports mutation of a single file. We
+ circumvent this problem by using an uncompressed tar archive as
+ multiple-file input for the fuzzer.
+
+ This patch enables tests of `FT_Attach_Stream' and AFM/PFM parsing;
+ a constructed tarball should contain a font file as the first
+ element, and files to be attached as further elements.
+
+ * src/tools/ftfuzzer/ftfuzzer.cc: Include libarchive headers.
+ (archive_read_entry_data, parse_data): New functions.
+ (LLVMFuzzerTestOneInput): Updated.
+
+ * src/tools/ftfuzzer/ftmutator.cc: New file, providing a custom
+ mutator for libFuzzer that can mutate tarballs in a sensible way.
+
2015-10-31 Werner Lemberg <wl@gnu.org>
[sfnt] Fix cmap 14 validation (#46346).
diff --git a/src/tools/ftfuzzer/ftfuzzer.cc b/src/tools/ftfuzzer/ftfuzzer.cc
index e5ab293..a232c68 100644
--- a/src/tools/ftfuzzer/ftfuzzer.cc
+++ b/src/tools/ftfuzzer/ftfuzzer.cc
@@ -1,8 +1,11 @@
-// we use `unique_ptr' and `decltype', defined since C++11
+// we use `unique_ptr', `decltype', and other gimmicks defined since C++11
#if __cplusplus < 201103L
# error "a C++11 compiler is needed"
#endif
+#include <archive.h>
+#include <archive_entry.h>
+
#include <assert.h>
#include <stdint.h>
@@ -10,7 +13,7 @@
#include <vector>
-using namespace std;
+ using namespace std;
#include <ft2build.h>
@@ -34,6 +37,7 @@ using namespace std;
static FT_Library library;
static int InitResult;
+
struct FT_Global {
FT_Global() {
InitResult = FT_Init_FreeType( &library );
@@ -46,6 +50,81 @@ using namespace std;
FT_Global global_ft;
+ static int
+ archive_read_entry_data( struct archive *ar,
+ vector<FT_Byte> *vw )
+ {
+ int r;
+ const FT_Byte* buff;
+ size_t size;
+ int64_t offset;
+
+ for (;;)
+ {
+ r = archive_read_data_block( ar,
+ reinterpret_cast<const void**>( &buff ),
+ &size,
+ &offset );
+ if ( r == ARCHIVE_EOF )
+ return ARCHIVE_OK;
+ if ( r != ARCHIVE_OK )
+ return r;
+
+ vw->insert( vw->end(), buff, buff + size );
+ }
+ }
+
+
+ static vector<vector<FT_Byte>>
+ parse_data( const uint8_t* data,
+ size_t size )
+ {
+ struct archive_entry* entry;
+ int r;
+ vector<vector<FT_Byte>> files;
+
+ unique_ptr<struct archive,
+ decltype ( archive_read_free )*> a( archive_read_new(),
+ archive_read_free );
+
+ // activate reading of uncompressed tar archives
+ archive_read_support_format_tar( a.get() );
+
+ // the need for `const_cast' was removed with libarchive commit be4d4dd
+ if ( !( r = archive_read_open_memory(
+ a.get(),
+ const_cast<void*>(static_cast<const void*>( data ) ),
+ size ) ) )
+ {
+ unique_ptr<struct archive,
+ decltype ( archive_read_close )*> a_open( a.get(),
+ archive_read_close );
+
+ // read files contained in archive
+ for (;;)
+ {
+ r = archive_read_next_header( a_open.get(), &entry );
+ if ( r == ARCHIVE_EOF )
+ break;
+ if ( r != ARCHIVE_OK )
+ break;
+
+ vector<FT_Byte> entry_data;
+ r = archive_read_entry_data( a.get(), &entry_data );
+ if ( r != ARCHIVE_OK )
+ break;
+
+ files.push_back( move( entry_data ) );
+ }
+ }
+
+ if ( files.size() == 0 )
+ files.emplace_back( data, data + size );
+
+ return files;
+ }
+
+
static void
setIntermediateAxis( FT_Face face )
{
@@ -74,6 +153,7 @@ using namespace std;
}
+ // the interface function to the libFuzzer library
extern "C" int
LLVMFuzzerTestOneInput( const uint8_t* data,
size_t size_ )
@@ -83,7 +163,7 @@ using namespace std;
if ( size_ < 1 )
return 0;
- long size = (long)size_;
+ const vector<vector<FT_Byte>>& files = parse_data( data, size_ );
FT_Face face;
FT_Int32 load_flags = FT_LOAD_DEFAULT;
@@ -99,7 +179,11 @@ using namespace std;
// more than a single font.
// get number of faces
- if ( FT_New_Memory_Face( library, data, size, -1, &face ) )
+ if ( FT_New_Memory_Face( library,
+ files[0].data(),
+ (FT_Long)files[0].size(),
+ -1,
+ &face ) )
return 0;
long num_faces = face->num_faces;
FT_Done_Face( face );
@@ -111,8 +195,8 @@ using namespace std;
{
// get number of instances
if ( FT_New_Memory_Face( library,
- data,
- size,
+ files[0].data(),
+ (FT_Long)files[0].size(),
-( face_index + 1 ),
&face ) )
continue;
@@ -125,12 +209,29 @@ using namespace std;
instance_index++ )
{
if ( FT_New_Memory_Face( library,
- data,
- size,
+ files[0].data(),
+ (FT_Long)files[0].size(),
( instance_index << 16 ) + face_index,
&face ) )
continue;
+ // if we have more than a single input file coming from an archive,
+ // attach them (starting with the second file) using the order given
+ // in the archive
+ for ( size_t files_index = 1;
+ files_index < files.size();
+ files_index++ )
+ {
+ FT_Open_Args open_args = {};
+ open_args.flags = FT_OPEN_MEMORY;
+ open_args.memory_base = files[files_index].data();
+ open_args.memory_size = (FT_Long)files[files_index].size();
+
+ // the last archive element will be eventually used as the
+ // attachment
+ FT_Attach_Stream( face, &open_args );
+ }
+
// loop over all bitmap stroke sizes
// and an arbitrary size for outlines
for ( long fixed_sizes_index = 0;
@@ -192,4 +293,4 @@ using namespace std;
}
-/* END */
+// END
diff --git a/src/tools/ftfuzzer/ftmutator.cc b/src/tools/ftfuzzer/ftmutator.cc
new file mode 100644
index 0000000..1223ba2
--- /dev/null
+++ b/src/tools/ftfuzzer/ftmutator.cc
@@ -0,0 +1,301 @@
+// A custom fuzzer mutator for FreeType.
+//
+// Since `tar' is not a valid format for input to FreeType, treat any input
+// that looks like `tar' as multiple files and mutate them separately.
+//
+// In the future, a variation of this may be used to guide mutation on a
+// logically higher level.
+
+// we use `unique_ptr', `decltype', and other gimmicks defined since C++11
+#if __cplusplus < 201103L
+# error "a C++11 compiler is needed"
+#endif
+
+#include <cstdint>
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <cstddef>
+#include <cstring>
+#include <iostream>
+
+#include <memory>
+#include <vector>
+
+#include <archive.h>
+#include <archive_entry.h>
+
+#include "FuzzerInterface.h"
+
+
+ using namespace std;
+
+
+ // This function should be defined by `ftfuzzer.cc'.
+ extern "C" int
+ LLVMFuzzerTestOneInput( const uint8_t* Data,
+ size_t Size );
+
+
+ static void
+ check_result( struct archive* a,
+ int r )
+ {
+ if ( r == ARCHIVE_OK )
+ return;
+
+ const char* m = archive_error_string( a );
+ write( 1, m, strlen( m ) );
+ exit( 1 );
+ }
+
+
+ static int
+ archive_read_entry_data( struct archive *ar,
+ vector<uint8_t> *vw )
+ {
+ int r;
+ const uint8_t* buff;
+ size_t size;
+ int64_t offset;
+
+ for (;;)
+ {
+ r = archive_read_data_block( ar,
+ reinterpret_cast<const void**>( &buff ),
+ &size,
+ &offset );
+ if ( r == ARCHIVE_EOF )
+ return ARCHIVE_OK;
+ if ( r != ARCHIVE_OK )
+ return r;
+
+ vw->insert( vw->end(), buff, buff + size );
+ }
+ }
+
+
+ static vector<vector<uint8_t>>
+ parse_data( const uint8_t* data,
+ size_t size )
+ {
+ struct archive_entry* entry;
+ int r;
+ vector<vector<uint8_t>> files;
+
+ unique_ptr<struct archive,
+ decltype ( archive_read_free )*> a( archive_read_new(),
+ archive_read_free );
+
+ // activate reading of uncompressed tar archives
+ archive_read_support_format_tar( a.get() );
+
+ // the need for `const_cast' was removed with libarchive commit be4d4dd
+ if ( !( r = archive_read_open_memory(
+ a.get(),
+ const_cast<void*>(static_cast<const void*>( data ) ),
+ size ) ) )
+ {
+ unique_ptr<struct archive,
+ decltype ( archive_read_close )*> a_open( a.get(),
+ archive_read_close );
+
+ // read files contained in archive
+ for (;;)
+ {
+ r = archive_read_next_header( a_open.get(), &entry );
+ if ( r == ARCHIVE_EOF )
+ break;
+ if ( r != ARCHIVE_OK )
+ break;
+
+ vector<uint8_t> entry_data;
+ r = archive_read_entry_data( a.get(), &entry_data );
+ if ( entry_data.size() == 0 )
+ continue;
+
+ files.push_back( move( entry_data ) );
+ if ( r != ARCHIVE_OK )
+ break;
+ }
+ }
+
+ return files;
+ }
+
+
+ class FTFuzzer
+ : public fuzzer::UserSuppliedFuzzer
+ {
+
+ public:
+ FTFuzzer( fuzzer::FuzzerRandomBase* Rand )
+ : fuzzer::UserSuppliedFuzzer( Rand ) {}
+
+
+ int
+ TargetFunction( const uint8_t* Data,
+ size_t Size )
+ {
+ return LLVMFuzzerTestOneInput( Data, Size );
+ }
+
+
+ // Custom mutator.
+ virtual size_t
+ Mutate( uint8_t* Data,
+ size_t Size,
+ size_t MaxSize )
+ {
+ vector<vector<uint8_t>> files = parse_data( Data, Size );
+
+ // If the file was not recognized as a tar file, treat it as non-tar.
+ if ( files.size() == 0 )
+ return fuzzer::UserSuppliedFuzzer::Mutate( Data, Size, MaxSize );
+
+ // This is somewhat `white box' on tar. The tar format uses 512 byte
+ // blocks. One block as header for each file, two empty blocks of 0's
+ // at the end. File data is padded to fill its last block.
+ size_t used_blocks = files.size() + 2;
+ for ( const auto& file : files )
+ used_blocks += ( file.size() + 511 ) / 512;
+
+ size_t max_blocks = MaxSize / 512;
+
+ // If the input is big, it will need to be downsized. If the original
+ // tar file was too big, it may have been clipped to fit. In this
+ // case it may not be possible to properly write out the data, as
+ // there may not be enough space for the trailing two blocks. Start
+ // dropping file data or files from the end.
+ for ( size_t i = files.size();
+ i-- > 1 && used_blocks > max_blocks; )
+ {
+ size_t blocks_to_free = used_blocks - max_blocks;
+ size_t blocks_currently_used_by_file_data =
+ ( files[i].size() + 511 ) / 512;
+
+ if ( blocks_currently_used_by_file_data >= blocks_to_free )
+ {
+ files[i].resize( ( blocks_currently_used_by_file_data -
+ blocks_to_free ) * 512 );
+ used_blocks -= blocks_to_free;
+ continue;
+ }
+
+ files.pop_back();
+ used_blocks -= blocks_currently_used_by_file_data + 1;
+ }
+
+ // If we get down to one file, don't use tar.
+ if ( files.size() == 1 )
+ {
+ memcpy( Data, files[0].data(), files[0].size() );
+ return fuzzer::UserSuppliedFuzzer::Mutate( Data,
+ files[0].size(),
+ MaxSize );
+ }
+
+ size_t free_blocks = max_blocks - used_blocks;
+
+ // Allow each file to use up as much of the currently available space
+ // it can. If it uses or gives up blocks, add them or remove them
+ // from the pool.
+ for ( auto&& file : files )
+ {
+ size_t blocks_currently_used_by_file = ( file.size() + 511 ) / 512;
+ size_t blocks_available = blocks_currently_used_by_file +
+ free_blocks;
+ size_t max_size = blocks_available * 512;
+ size_t data_size = file.size();
+
+ file.resize( max_size );
+ file.resize( fuzzer::UserSuppliedFuzzer::Mutate( file.data(),
+ data_size,
+ max_size ) );
+
+ size_t blocks_now_used_by_file = ( file.size() + 511 ) / 512;
+ free_blocks = free_blocks +
+ blocks_currently_used_by_file -
+ blocks_now_used_by_file;
+ }
+
+ unique_ptr<struct archive,
+ decltype ( archive_write_free )*> a( archive_write_new(),
+ archive_write_free );
+
+ check_result( a.get(), archive_write_add_filter_none( a.get() ) );
+ check_result( a.get(), archive_write_set_format_ustar( a.get() ) );
+
+ // `used' may not be correct until after the archive is closed.
+ size_t used = 0xbadbeef;
+ check_result( a.get(), archive_write_open_memory( a.get(),
+ Data,
+ MaxSize,
+ &used ) );
+
+ {
+ unique_ptr<struct archive,
+ decltype ( archive_write_close )*> a_open( a.get(),
+ archive_write_close );
+
+ int file_index = 0;
+ for ( const auto& file : files )
+ {
+ unique_ptr<struct archive_entry,
+ decltype ( archive_entry_free )*>
+ e( archive_entry_new2( a_open.get() ),
+ archive_entry_free );
+
+ char name_buffer[100];
+ snprintf( name_buffer, 100, "file%d", file_index++ );
+
+ archive_entry_set_pathname( e.get(), name_buffer );
+ archive_entry_set_size( e.get(), file.size() );
+ archive_entry_set_filetype( e.get(), AE_IFREG );
+ archive_entry_set_perm( e.get(), 0644 );
+
+ check_result( a_open.get(),
+ archive_write_header( a_open.get(), e.get() ) );
+ archive_write_data( a_open.get(), file.data(), file.size() );
+ check_result( a_open.get(),
+ archive_write_finish_entry( a_open.get() ) );
+ }
+ }
+
+ return used;
+ }
+
+
+ // Cross `Data1' and `Data2', write up to `MaxOutSize' bytes into `Out',
+ // return the number of bytes written, which should be positive.
+ virtual size_t
+ CrossOver( const uint8_t* Data1,
+ size_t Size1,
+ const uint8_t* Data2,
+ size_t Size2,
+ uint8_t* Out,
+ size_t MaxOutSize )
+ {
+ return fuzzer::UserSuppliedFuzzer::CrossOver( Data1,
+ Size1,
+ Data2,
+ Size2,
+ Out,
+ MaxOutSize );
+ }
+
+ }; // end of FTFuzzer class
+
+
+ int
+ main( int argc,
+ char* *argv )
+ {
+ fuzzer::FuzzerRandomLibc Rand( 0 );
+ FTFuzzer F( &Rand );
+
+ fuzzer::FuzzerDriver( argc, argv, F );
+ }
+
+
+// END