Edit

IABSD.fr/src/lib/check_sym

Branch :

  • Show log

    Commit

  • Author : guenther
    Date : 2019-10-05 01:01:23
    Hash : 8acaac6c
    Message : The mips64 symbol table layout means we have to consider symbol visibility and skip 'protected' symbols when identifying which functions will be subjects of lazy resolution

  • lib/check_sym
  • #!/bin/ksh
    #  $OpenBSD: check_sym,v 1.10 2019/10/05 01:01:23 guenther Exp $
    #
    # Copyright (c) 2016,2019 Philip Guenther <guenther@openbsd.org>
    #
    # Permission to use, copy, modify, and distribute this software for any
    # purpose with or without fee is hereby granted, provided that the above
    # copyright notice and this permission notice appear in all copies.
    #
    # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
    # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
    # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
    # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    #
    #
    #  check_sym -- compare the symbols and external function references in two
    #	versions of a shared library
    #
    #  SYNOPSIS
    #	check_sym [-ch] [old [new]]
    #
    #  DESCRIPTION
    #	Library developers need to be aware when they have changed the
    #	ABI of a library.  To assist them, check_sym examines two versions
    #	of a shared library and reports changes to the following:
    #	 * the set of exported symbols and their strengths
    #	 * the set of undefined symbols referenced
    #	 * the set of lazily-resolved functions (PLT)
    #
    #	In each case, additions and removals are reported; for exported
    #	symbols it also reports when a symbol is weakened or strengthened.
    #
    #	The shared libraries to compare can be specified on the
    #	command-line.  Otherwise, check_sym expects to be run from the
    #	source directory of a library with a shlib_version file specifying
    #	the version being built and the new library in the obj subdirectory.
    #	If the old library to compare against wasn't specified either then
    #	check_sym will take the highest version of that library in the
    #	*current* directory, or the highest version of that library in
    #	/usr/lib if it wasn't present in the current directory.
    #
    #	check_sym uses fixed names in /tmp for its intermediate files,
    #	as they contain useful details for those trying to understand
    #	what changed.  If any of them cannot be created by the user,
    #	the command will fail.  The files can be cleaned up using
    #	the -c option.
    #
    #
    #	The *basic* rules of thumb for library versions are: if you
    #	 * stop exporting a symbol, or
    #	 * change the size of a data symbol
    #	 * start exporting a symbol that an inter-dependent library needs
    #	then you need to bump the MAJOR version of the library.
    #
    #	Otherwise, if you:
    #	 * start exporting a symbol
    #	then you need to bump the MINOR version of the library.
    #
    #  SEE ALSO
    #	readelf(1), elf(5)
    #
    #  AUTHORS
    #	Philip Guenther <guenther@openbsd.org>
    #
    #  CAVEATS
    #	The elf format is infinitely extendable, but check_sym only
    #	handles a few weirdnesses.  Running it on or against new archs
    #	may result in meaningless results.
    #
    #  BUGS
    #	While the author stills find the intermediate files useful,
    #	most people won't.  By default they should be placed in a
    #	temp directory and removed.
    #
    
    get_lib_name()
    {
    	sed -n 's/^[ 	]*LIB[ 	]*=[ 	]*\([^ 	]*\).*/\1/p' "$@"
    }
    
    pick_highest()
    {
    	old=
    	omaj=-1
    	omin=0
    	for i
    	do
    		[[ -f $i ]] || continue
    		maj=${i%.*}; maj=${maj##*.}
    		min=${i##*.}
    		if [[ $maj -gt $omaj || ( $maj -eq $omaj && $min -gt $omin ) ]]
    		then
    			old=$i
    			omaj=$maj
    			omin=$min
    		fi
    	done
    	[[ $old != "" ]]
    }
    
    usage()
    {
    	usage="usage: check_sym [-chv] [old [new]]"
    	if [[ $# -gt 0 ]]
    	then
    		echo "check_sym: $@
    $usage" >&2
    		exit 1
    	fi
    	echo "$usage"
    	exit 0
    }
    
    file_list=/tmp/{D{,S,W,O},J,S,U,d,j,r,s}{1,2}
    
    verbose=false
    while getopts :chv opt "$@"
    do
    	case $opt in
    	h)	usage;;
    	c)	rm -f $file_list
    		exit 0;;
    	v)	verbose=true;;
    	\?)	usage "unknown option -- $OPTARG";;
    	esac
    done
    shift $((OPTIND - 1))
    [[ $# -gt 2 ]] && usage "too many arguments"
    
    # Old library?
    if [[ $1 = ?(*/)lib*.so* ]]
    then
    	if [[ ! -f $1 ]]
    	then
    		echo "$1 doesn't exist" >&2
    		exit 1
    	fi
    	old=$1
    	lib=${old##*/}
    	lib=${lib%%.so.*}
    	shift
    else
    	# try determining it from the current directory
    	if [[ -f Makefile ]] && lib=$(get_lib_name Makefile) &&
    	   [[ $lib != "" ]]
    	then
    		lib=lib$lib
    	else
    		lib=libc
    	fi
    
    	# Is there a copy of that lib in the current directory?
    	# If so, use the highest numbered one
    	if ! pick_highest $lib.so.* && ! pick_highest /usr/lib/$lib.so.*
    	then
    		echo "unable to find $lib.so.*" >&2
    		exit 1
    	fi
    fi
    
    # New library?
    if [[ $1 = ?(*/)lib*.so* ]]
    then
    	new=$1
    	shift
    else
    	# Dig info out of the just built library
    	. ./shlib_version
    	new=obj/${lib}.so.${major}.${minor}
    fi
    if [[ ! -f $new ]]
    then
    	echo "$new doesn't exist" >&2
    	exit 1
    fi
    
    # Filter the output of readelf -s to be easier to parse by removing a
    # field that only appears on some symbols: [<other>: 88]
    # Not really arch-specific, but I've only seen it on alpha
    filt_symtab() {
    	sed 's/\[<other>: [0-9a-f]*\]//'
    }
    
    # precreate all the files we'll use, but with noclobber set to avoid
    # symlink attacks
    set -C
    files=
    trap 'rm -f $files' 1 2 15 ERR
    for i in $file_list
    do
    	rm -f $i
    	3>$i
    	files="$files $i"
    done
    set +C
    
    readelf -rW $old > /tmp/r1
    readelf -rW $new > /tmp/r2
    
    readelf -sW $old | filt_symtab > /tmp/s1
    readelf -sW $new | filt_symtab > /tmp/s2
    
    
    case $(readelf -h $new | grep '^ *Machine:') in
    *MIPS*)	cpu=mips64;;
    *HPPA*)	cpu=hppa;;
    *)	cpu=dontcare;;
    esac
    
    if [[ $cpu = mips64 ]]
    then
    	gotsym1=$(readelf -d $old | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
    	gotsym2=$(readelf -d $new | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
    fi
    
    jump_slots() { 
    	case $cpu in
    	hppa)	awk '/IPLT/ && $5 != ""{print $5}' /tmp/r$1
    		;;
    	mips64)	# the $((gotsym$1)) converts hex to decimal
    		awk -v g=$((gotsym$1)) \
    			'/^Symbol table ..symtab/{exit}
    			$6 == "PROTECTED" { next }
    			$1+0 >= g && $4 == "FUNC" {print $8}' /tmp/s$1
    		;;
    	*)	awk '/JU*MP_SL/ && $5 != ""{print $5}' /tmp/r$1
    		;;
    	esac | sort -o /tmp/j$1
    }
    
    dynamic_sym() {
    	awk -v s=$1 '/^Symbol table ..symtab/{exit}
    		! /^ *[1-9]/   {next}
    		$7 == "UND"    {print $8 | ("sort -o /tmp/U" s); next }
    		$5 == "GLOBAL" {print $8 | ("sort -o /tmp/DS" s) }
    		$5 == "WEAK"   {print $8 | ("sort -o /tmp/DW" s) }
    		$5 != "LOCAL"  {print $8 | ("sort -o /tmp/D" s) }
    		$5 != "LOCAL" && $4 == "OBJECT" {
    				print $8, $3 | ("sort -o /tmp/DO" s) }
    		{print $4, $5, $6, $8}' /tmp/s$1 | sort -o /tmp/d$1
    }
    
    static_sym() { 
    	awk '/^Symbol table ..symtab/{s=1}
    	     /LOCAL/{next}
    	     s&&/^ *[1-9]/{print $4, $5, $6, $8}' /tmp/s$1 | sort -o /tmp/S$1
    }
    
    data_sym_changes() {
    	join "$@" | awk '$2 != $3 { print $1 " " $2 " --> " $3 }'
    }
    
    output_if_not_empty() {
    	leader=$1
    	shift
    	if "$@" | grep -q .
    	then
    		echo "$leader"
    		"$@" | sed 's:^:	:'
    		echo
    	fi
    }
    
    
    for i in 1 2
    do
    	jump_slots $i
    	dynamic_sym $i
    	static_sym $i
    	comm -23 /tmp/j$i /tmp/U$i >/tmp/J$i
    done
    
    echo "$old --> $new"
    if cmp -s /tmp/d[12] && cmp -s /tmp/DO[12]
    then
    	printf "No dynamic export changes\n"
    else
    	printf "Dynamic export changes:\n"
    	output_if_not_empty "added:" comm -13 /tmp/D[12]
    	output_if_not_empty "removed:" comm -23 /tmp/D[12]
    	output_if_not_empty "weakened:" comm -12 /tmp/DS1 /tmp/DW2
    	output_if_not_empty "strengthened:" comm -12 /tmp/DW1 /tmp/DS2
    	output_if_not_empty "data object sizes changes:" \
    					data_sym_changes /tmp/DO[12]
    fi
    if ! cmp -s /tmp/U[12]
    then
    	printf "External reference changes:\n"
    	output_if_not_empty "added:" comm -13 /tmp/U[12]
    	output_if_not_empty "removed:" comm -23 /tmp/U[12]
    fi
    
    if $verbose; then
    	printf "\nReloc counts:\nbefore:\n" 
    	grep ^R /tmp/r1
    	printf "\nafter:\n"
    	grep ^R /tmp/r2
    fi
    
    output_if_not_empty "PLT added:" comm -13 /tmp/J1 /tmp/J2
    output_if_not_empty "PLT removed:" comm -23 /tmp/J1 /tmp/J2