Edit

IABSD.fr/src/sys/uvm/uvm_amap.c

Branch :

  • Show log

    Commit

  • Author : mpi
    Date : 2025-12-10 08:38:18
    Hash : 834e1ba0
    Message : Push `pageqlock' dances inside uvm_page{de,}activate() & uvm_pagewire(). Tested during multiple bulks on amd64, i386, arm64 and sparc64 by jca@, phessler@ and sthen@.

  • sys/uvm/uvm_amap.c
  • /*	$OpenBSD: uvm_amap.c,v 1.98 2025/12/10 08:38:18 mpi Exp $	*/
    /*	$NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $	*/
    
    /*
     * Copyright (c) 1997 Charles D. Cranor and Washington University.
     * All rights reserved.
     *
     * Redistribution and use in source and binary forms, with or without
     * modification, are permitted provided that the following conditions
     * are met:
     * 1. Redistributions of source code must retain the above copyright
     *    notice, this list of conditions and the following disclaimer.
     * 2. Redistributions in binary form must reproduce the above copyright
     *    notice, this list of conditions and the following disclaimer in the
     *    documentation and/or other materials provided with the distribution.
     *
     * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     */
    
    /*
     * uvm_amap.c: amap operations
     *
     * this file contains functions that perform operations on amaps.  see
     * uvm_amap.h for a brief explanation of the role of amaps in uvm.
     */
    
    #include <sys/param.h>
    #include <sys/systm.h>
    #include <sys/malloc.h>
    #include <sys/kernel.h>
    #include <sys/pool.h>
    #include <sys/atomic.h>
    
    #include <uvm/uvm.h>
    #include <uvm/uvm_swap.h>
    
    /*
     * pools for allocation of vm_amap structures.  note that in order to
     * avoid an endless loop, the amap pool's allocator cannot allocate
     * memory from an amap (it currently goes through the kernel uobj, so
     * we are ok).
     */
    
    struct pool uvm_amap_pool;
    struct pool uvm_small_amap_pool[UVM_AMAP_CHUNK];
    struct pool uvm_amap_chunk_pool;
    
    LIST_HEAD(, vm_amap) amap_list;
    struct rwlock amap_list_lock = RWLOCK_INITIALIZER("amaplstlk");
    #define amap_lock_list()	rw_enter_write(&amap_list_lock)
    #define amap_unlock_list()	rw_exit_write(&amap_list_lock)
    
    static char amap_small_pool_names[UVM_AMAP_CHUNK][9];
    
    /*
     * local functions
     */
    
    static struct vm_amap *amap_alloc1(int, int, int);
    static inline void amap_list_insert(struct vm_amap *);
    static inline void amap_list_remove(struct vm_amap *);   
    
    struct vm_amap_chunk *amap_chunk_get(struct vm_amap *, int, int, int);
    void amap_chunk_free(struct vm_amap *, struct vm_amap_chunk *);
    
    #define PPREF_NONE ((int *) -1)	/* not using ppref */
    
    void	amap_pp_adjref(struct vm_amap *, int, vsize_t, int);
    void	amap_pp_establish(struct vm_amap *);
    void	amap_wiperange_chunk(struct vm_amap *, struct vm_amap_chunk *, int,
    	    int);
    void	amap_wiperange(struct vm_amap *, int, int);
    
    static inline void
    amap_list_insert(struct vm_amap *amap)
    {
    	amap_lock_list();
    	LIST_INSERT_HEAD(&amap_list, amap, am_list);
    	amap_unlock_list();
    }
    
    static inline void
    amap_list_remove(struct vm_amap *amap)
    {
    	amap_lock_list();
    	LIST_REMOVE(amap, am_list);
    	amap_unlock_list();
    }
    
    /*
     * amap_chunk_get: lookup a chunk for slot. if create is non-zero,
     * the chunk is created if it does not yet exist.
     *
     * => returns the chunk on success or NULL on error
     */
    struct vm_amap_chunk *
    amap_chunk_get(struct vm_amap *amap, int slot, int create, int waitf)
    {
    	int bucket = UVM_AMAP_BUCKET(amap, slot);
    	int baseslot = AMAP_BASE_SLOT(slot);
    	int n;
    	struct vm_amap_chunk *chunk, *newchunk, *pchunk = NULL;
    
    	if (UVM_AMAP_SMALL(amap))
    		return &amap->am_small;
    
    	for (chunk = amap->am_buckets[bucket]; chunk != NULL;
    	    chunk = TAILQ_NEXT(chunk, ac_list)) {
    		if (UVM_AMAP_BUCKET(amap, chunk->ac_baseslot) != bucket)
    			break;
    		if (chunk->ac_baseslot == baseslot)
    			return chunk;
    		pchunk = chunk;
    	}
    	if (!create)
    		return NULL;
    
    	if (amap->am_nslot - baseslot >= UVM_AMAP_CHUNK)
    		n = UVM_AMAP_CHUNK;
    	else
    		n = amap->am_nslot - baseslot;
    
    	newchunk = pool_get(&uvm_amap_chunk_pool, waitf | PR_ZERO);
    	if (newchunk == NULL)
    		return NULL;
    
    	if (pchunk == NULL) {
    		TAILQ_INSERT_TAIL(&amap->am_chunks, newchunk, ac_list);
    		KASSERT(amap->am_buckets[bucket] == NULL);
    		amap->am_buckets[bucket] = newchunk;
    	} else
    		TAILQ_INSERT_AFTER(&amap->am_chunks, pchunk, newchunk,
    		    ac_list);
    
    	amap->am_ncused++;
    	newchunk->ac_baseslot = baseslot;
    	newchunk->ac_nslot = n;
    	return newchunk;
    }
    
    void
    amap_chunk_free(struct vm_amap *amap, struct vm_amap_chunk *chunk)
    {
    	int bucket = UVM_AMAP_BUCKET(amap, chunk->ac_baseslot);
    	struct vm_amap_chunk *nchunk;
    
    	if (UVM_AMAP_SMALL(amap))
    		return;
    
    	nchunk = TAILQ_NEXT(chunk, ac_list);
    	TAILQ_REMOVE(&amap->am_chunks, chunk, ac_list);
    	if (amap->am_buckets[bucket] == chunk) {
    		if (nchunk != NULL &&
    		    UVM_AMAP_BUCKET(amap, nchunk->ac_baseslot) == bucket)
    			amap->am_buckets[bucket] = nchunk;
    		else
    			amap->am_buckets[bucket] = NULL;
    
    	}
    	pool_put(&uvm_amap_chunk_pool, chunk);
    	amap->am_ncused--;
    }
    
    /*
     * what is ppref?   ppref is an _optional_ amap feature which is used
     * to keep track of reference counts on a per-page basis.
     *
     * an array of ints is allocated for the pprefs.  this
     * array is allocated only when a partial reference is added to the
     * map (either by unmapping part of the amap, or gaining a reference
     * to only a part of an amap).  if the allocation of the array fails
     * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
     * that we tried to do ppref's but couldn't alloc the array so just
     * give up (after all, this is an optional feature!).
     *
     * the array is divided into page sized "chunks."   for chunks of length 1,
     * the chunk reference count plus one is stored in that chunk's slot.
     * for chunks of length > 1 the first slot contains (the reference count
     * plus one) * -1.    [the negative value indicates that the length is
     * greater than one.]   the second slot of the chunk contains the length
     * of the chunk.   here is an example:
     *
     * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
     *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
     *              <----------><-><----><-------><----><-><------->
     * (x = don't care)
     *
     * this allows us to allow one int to contain the ref count for the whole
     * chunk.    note that the "plus one" part is needed because a reference
     * count of zero is neither positive or negative (need a way to tell
     * if we've got one zero or a bunch of them).
     *
     * here are some in-line functions to help us.
     */
    
    /*
     * pp_getreflen: get the reference and length for a specific offset
     *
     * => ppref's amap must be locked
     */
    static inline void
    pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
    {
    
    	if (ppref[offset] > 0) {		/* chunk size must be 1 */
    		*refp = ppref[offset] - 1;	/* don't forget to adjust */
    		*lenp = 1;
    	} else {
    		*refp = (ppref[offset] * -1) - 1;
    		*lenp = ppref[offset+1];
    	}
    }
    
    /*
     * pp_setreflen: set the reference and length for a specific offset
     *
     * => ppref's amap must be locked
     */
    static inline void
    pp_setreflen(int *ppref, int offset, int ref, int len)
    {
    	if (len == 1) {
    		ppref[offset] = ref + 1;
    	} else {
    		ppref[offset] = (ref + 1) * -1;
    		ppref[offset+1] = len;
    	}
    }
    
    /*
     * amap_init: called at boot time to init global amap data structures
     */
    
    void
    amap_init(void)
    {
    	int i;
    	size_t size;
    
    	/* Initialize the vm_amap pool. */
    	pool_init(&uvm_amap_pool, sizeof(struct vm_amap),
    	    0, IPL_MPFLOOR, PR_WAITOK, "amappl", NULL);
    	pool_sethiwat(&uvm_amap_pool, 4096);
    
    	/* initialize small amap pools */
    	for (i = 0; i < nitems(uvm_small_amap_pool); i++) {
    		snprintf(amap_small_pool_names[i],
    		    sizeof(amap_small_pool_names[0]), "amappl%d", i + 1);
    		size = offsetof(struct vm_amap, am_small.ac_anon) +
    		    (i + 1) * sizeof(struct vm_anon *);
    		pool_init(&uvm_small_amap_pool[i], size, 0, IPL_MPFLOOR,
    		    PR_WAITOK, amap_small_pool_names[i], NULL);
    	}
    
    	pool_init(&uvm_amap_chunk_pool, sizeof(struct vm_amap_chunk) +
    	    UVM_AMAP_CHUNK * sizeof(struct vm_anon *),
    	    0, IPL_MPFLOOR, PR_WAITOK, "amapchunkpl", NULL);
    	pool_sethiwat(&uvm_amap_chunk_pool, 4096);
    }
    
    /*
     * amap_alloc1: allocate an amap, but do not initialise the overlay.
     *
     * => Note: lock is not set.
     */
    static inline struct vm_amap *
    amap_alloc1(int slots, int waitf, int lazyalloc)
    {
    	struct vm_amap *amap;
    	struct vm_amap_chunk *chunk, *tmp;
    	int chunks, log_chunks, chunkperbucket = 1, hashshift = 0;
    	int buckets, i, n;
    	int pwaitf = (waitf & M_WAITOK) ? PR_WAITOK : PR_NOWAIT;
    
    	KASSERT(slots > 0);
    
    	/*
    	 * Cast to unsigned so that rounding up cannot cause integer overflow
    	 * if slots is large.
    	 */
    	chunks = roundup((unsigned int)slots, UVM_AMAP_CHUNK) / UVM_AMAP_CHUNK;
    
    	if (lazyalloc) {
    		/*
    		 * Basically, the amap is a hash map where the number of
    		 * buckets is fixed. We select the number of buckets using the
    		 * following strategy:
    		 *
    		 * 1. The maximal number of entries to search in a bucket upon
    		 * a collision should be less than or equal to
    		 * log2(slots / UVM_AMAP_CHUNK). This is the worst-case number
    		 * of lookups we would have if we could chunk the amap. The
    		 * log2(n) comes from the fact that amaps are chunked by
    		 * splitting up their vm_map_entries and organizing those
    		 * in a binary search tree.
    		 *
    		 * 2. The maximal number of entries in a bucket must be a
    		 * power of two.
    		 *
    		 * The maximal number of entries per bucket is used to hash
    		 * a slot to a bucket.
    		 *
    		 * In the future, this strategy could be refined to make it
    		 * even harder/impossible that the total amount of KVA needed
    		 * for the hash buckets of all amaps to exceed the maximal
    		 * amount of KVA memory reserved for amaps.
    		 */
    		for (log_chunks = 1; (chunks >> log_chunks) > 0; log_chunks++)
    			continue;
    
    		chunkperbucket = 1 << hashshift;
    		while (chunkperbucket + 1 < log_chunks) {
    			hashshift++;
    			chunkperbucket = 1 << hashshift;
    		}
    	}
    
    	if (slots > UVM_AMAP_CHUNK)
    		amap = pool_get(&uvm_amap_pool, pwaitf);
    	else
    		amap = pool_get(&uvm_small_amap_pool[slots - 1],
    		    pwaitf | PR_ZERO);
    	if (amap == NULL)
    		return NULL;
    
    	amap->am_lock = NULL;
    	amap->am_ref = 1;
    	amap->am_flags = 0;
    	amap->am_ppref = NULL;
    	amap->am_nslot = slots;
    	amap->am_nused = 0;
    
    	if (UVM_AMAP_SMALL(amap)) {
    		amap->am_small.ac_nslot = slots;
    		return amap;
    	}
    
    	amap->am_ncused = 0;
    	TAILQ_INIT(&amap->am_chunks);
    	amap->am_hashshift = hashshift;
    	amap->am_buckets = NULL;
    
    	buckets = howmany(chunks, chunkperbucket);
    	amap->am_buckets = mallocarray(buckets, sizeof(*amap->am_buckets),
    	    M_UVMAMAP, waitf | (lazyalloc ? M_ZERO : 0));
    	if (amap->am_buckets == NULL)
    		goto fail1;
    	amap->am_nbuckets = buckets;
    
    	if (!lazyalloc) {
    		for (i = 0; i < buckets; i++) {
    			if (i == buckets - 1) {
    				n = slots % UVM_AMAP_CHUNK;
    				if (n == 0)
    					n = UVM_AMAP_CHUNK;
    			} else
    				n = UVM_AMAP_CHUNK;
    
    			chunk = pool_get(&uvm_amap_chunk_pool,
    			    PR_ZERO | pwaitf);
    			if (chunk == NULL)
    				goto fail1;
    
    			amap->am_buckets[i] = chunk;
    			amap->am_ncused++;
    			chunk->ac_baseslot = i * UVM_AMAP_CHUNK;
    			chunk->ac_nslot = n;
    			TAILQ_INSERT_TAIL(&amap->am_chunks, chunk, ac_list);
    		}
    	}
    
    	return amap;
    
    fail1:
    	free(amap->am_buckets, M_UVMAMAP, buckets * sizeof(*amap->am_buckets));
    	TAILQ_FOREACH_SAFE(chunk, &amap->am_chunks, ac_list, tmp)
    		pool_put(&uvm_amap_chunk_pool, chunk);
    	pool_put(&uvm_amap_pool, amap);
    	return NULL;
    }
    
    static void
    amap_lock_alloc(struct vm_amap *amap)
    {
    	rw_obj_alloc(&amap->am_lock, "amaplk");
    }
    
    /*
     * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
     *
     * => caller should ensure sz is a multiple of PAGE_SIZE
     * => reference count to new amap is set to one
     * => new amap is returned unlocked
     */
    struct vm_amap *
    amap_alloc(vaddr_t sz, int waitf, int lazyalloc)
    {
    	struct vm_amap *amap;
    	size_t slots;
    
    	AMAP_B2SLOT(slots, sz);		/* load slots */
    	if (slots > INT_MAX)
    		return NULL;
    
    	amap = amap_alloc1(slots, waitf, lazyalloc);
    	if (amap != NULL) {
    		amap_lock_alloc(amap);
    		amap_list_insert(amap);
    	}
    
    	return amap;
    }
    
    
    /*
     * amap_free: free an amap
     *
     * => the amap must be unlocked
     * => the amap should have a zero reference count and be empty
     */
    void
    amap_free(struct vm_amap *amap)
    {
    	struct vm_amap_chunk *chunk, *tmp;
    
    	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
    	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
    
    	if (amap->am_lock != NULL) {
    		KASSERT(!rw_write_held(amap->am_lock));
    		rw_obj_free(amap->am_lock);
    	}
    
    	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
    		free(amap->am_ppref, M_UVMAMAP, amap->am_nslot * sizeof(int));
    
    	if (UVM_AMAP_SMALL(amap))
    		pool_put(&uvm_small_amap_pool[amap->am_nslot - 1], amap);
    	else {
    		TAILQ_FOREACH_SAFE(chunk, &amap->am_chunks, ac_list, tmp)
    		    pool_put(&uvm_amap_chunk_pool, chunk);
    		free(amap->am_buckets, M_UVMAMAP,
    		    amap->am_nbuckets * sizeof(*amap->am_buckets));
    		pool_put(&uvm_amap_pool, amap);
    	}
    }
    
    /*
     * amap_wipeout: wipeout all anon's in an amap; then free the amap!
     *
     * => Called from amap_unref(), when reference count drops to zero.
     * => amap must be locked.
     */
    void
    amap_wipeout(struct vm_amap *amap)
    {
    	int slot;
    	struct vm_anon *anon;
    	struct vm_amap_chunk *chunk;
    
    	KASSERT(rw_write_held(amap->am_lock));
    	KASSERT(amap->am_ref == 0);
    
    	if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
    		/*
    		 * Note: amap_swap_off() will call us again.
    		 */
    		amap_unlock(amap);
    		return;
    	}
    
    	amap_list_remove(amap);
    
    	AMAP_CHUNK_FOREACH(chunk, amap) {
    		int i, refs, map = chunk->ac_usedmap;
    
    		for (i = ffs(map); i != 0; i = ffs(map)) {
    			slot = i - 1;
    			map ^= 1 << slot;
    			anon = chunk->ac_anon[slot];
    
    			if (anon == NULL || anon->an_ref == 0)
    				panic("amap_wipeout: corrupt amap");
    			KASSERT(anon->an_lock == amap->am_lock);
    
    			/*
    			 * Drop the reference.
    			 */
    			refs = --anon->an_ref;
    			if (refs == 0) {
    				uvm_anfree(anon);
    			}
    		}
    	}
    
    	/*
    	 * Finally, destroy the amap.
    	 */
    	amap->am_ref = 0;	/* ... was one */
    	amap->am_nused = 0;
    	amap_unlock(amap);
    	amap_free(amap);
    }
    
    /*
     * amap_copy: ensure that a map entry's "needs_copy" flag is false
     *	by copying the amap if necessary.
     *
     * => an entry with a null amap pointer will get a new (blank) one.
     * => the map that the map entry belongs to must be locked by caller.
     * => the amap currently attached to "entry" (if any) must be unlocked.
     * => if canchunk is true, then we may clip the entry into a chunk
     * => "startva" and "endva" are used only if canchunk is true.  they are
     *     used to limit chunking (e.g. if you have a large space that you
     *     know you are going to need to allocate amaps for, there is no point
     *     in allowing that to be chunked)
     */
    
    void
    amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
        boolean_t canchunk, vaddr_t startva, vaddr_t endva)
    {
    	struct vm_amap *amap, *srcamap;
    	int slots, lcv, lazyalloc = 0;
    	vaddr_t chunksize;
    	int i, j, k, n, srcslot;
    	struct vm_amap_chunk *chunk = NULL, *srcchunk = NULL;
    	struct vm_anon *anon;
    
    	KASSERT(map != kernel_map);		/* we use sleeping locks */
    
    	/*
    	 * Is there an amap to copy?  If not, create one.
    	 */
    	if (entry->aref.ar_amap == NULL) {
    		/*
    		 * Check to see if we have a large amap that we can
    		 * chunk.  We align startva/endva to chunk-sized
    		 * boundaries and then clip to them.
    		 *
    		 * If we cannot chunk the amap, allocate it in a way
    		 * that makes it grow or shrink dynamically with
    		 * the number of slots.
    		 */
    		if (atop(entry->end - entry->start) >= UVM_AMAP_LARGE) {
    			if (canchunk) {
    				/* convert slots to bytes */
    				chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
    				startva = (startva / chunksize) * chunksize;
    				endva = roundup(endva, chunksize);
    				UVM_MAP_CLIP_START(map, entry, startva);
    				/* watch out for endva wrap-around! */
    				if (endva >= startva)
    					UVM_MAP_CLIP_END(map, entry, endva);
    			} else
    				lazyalloc = 1;
    		}
    
    		entry->aref.ar_pageoff = 0;
    		entry->aref.ar_amap = amap_alloc(entry->end - entry->start,
    		    waitf, lazyalloc);
    		if (entry->aref.ar_amap != NULL)
    			entry->etype &= ~UVM_ET_NEEDSCOPY;
    		return;
    	}
    
    	/*
    	 * First check and see if we are the only map entry referencing
    	 * he amap we currently have.  If so, then just take it over instead
    	 * of copying it.  Note that we are reading am_ref without lock held
    	 * as the value can only be one if we have the only reference
    	 * to the amap (via our locked map).  If the value is greater than
    	 * one, then allocate amap and re-check the value.
    	 */
    	if (entry->aref.ar_amap->am_ref == 1) {
    		entry->etype &= ~UVM_ET_NEEDSCOPY;
    		return;
    	}
    
    	/*
    	 * Allocate a new amap (note: not initialised, etc).
    	 */
    	AMAP_B2SLOT(slots, entry->end - entry->start);
    	if (!UVM_AMAP_SMALL(entry->aref.ar_amap) &&
    	    entry->aref.ar_amap->am_hashshift != 0)
    		lazyalloc = 1;
    	amap = amap_alloc1(slots, waitf, lazyalloc);
    	if (amap == NULL)
    		return;
    	srcamap = entry->aref.ar_amap;
    
    	/*
    	 * Make the new amap share the source amap's lock, and then lock
    	 * both.
    	 */
    	amap->am_lock = srcamap->am_lock;
    	rw_obj_hold(amap->am_lock);
    
    	amap_lock(srcamap, RW_WRITE);
    
    	/*
    	 * Re-check the reference count with the lock held.  If it has
    	 * dropped to one - we can take over the existing map.
    	 */
    	if (srcamap->am_ref == 1) {
    		/* Just take over the existing amap. */
    		entry->etype &= ~UVM_ET_NEEDSCOPY;
    		amap_unlock(srcamap);
    		/* Destroy the new (unused) amap. */
    		amap->am_ref--;
    		amap_free(amap);
    		return;
    	}
    
    	/*
    	 * Copy the slots.
    	 */
    	for (lcv = 0; lcv < slots; lcv += n) {
    		srcslot = entry->aref.ar_pageoff + lcv;
    		i = UVM_AMAP_SLOTIDX(lcv);
    		j = UVM_AMAP_SLOTIDX(srcslot);
    		n = UVM_AMAP_CHUNK;
    		if (i > j)
    			n -= i;
    		else
    			n -= j;
    		if (lcv + n > slots)
    			n = slots - lcv;
    
    		srcchunk = amap_chunk_get(srcamap, srcslot, 0, PR_NOWAIT);
    		if (srcchunk == NULL)
    			continue;
    
    		chunk = amap_chunk_get(amap, lcv, 1, PR_NOWAIT);
    		if (chunk == NULL) {
    			amap_unlock(srcamap);
    			/* Destroy the new amap. */
    			amap->am_ref--;
    			amap_free(amap);
    			return;
    		}
    
    		for (k = 0; k < n; i++, j++, k++) {
    			chunk->ac_anon[i] = anon = srcchunk->ac_anon[j];
    			if (anon == NULL)
    				continue;
    
    			KASSERT(anon->an_lock == srcamap->am_lock);
    			KASSERT(anon->an_ref > 0);
    			chunk->ac_usedmap |= (1 << i);
    			anon->an_ref++;
    			amap->am_nused++;
    		}
    	}
    
    	/*
    	 * Drop our reference to the old amap (srcamap) and unlock.
    	 * Since the reference count on srcamap is greater than one,
    	 * (we checked above), it cannot drop to zero while it is locked.
    	 */
    	srcamap->am_ref--;
    	KASSERT(srcamap->am_ref > 0);
    
    	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
    		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
    	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
    		amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 
    		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
    	}
    
    	/*
    	 * If we referenced any anons, then share the source amap's lock.
    	 * Otherwise, we have nothing in common, so allocate a new one.
    	 */
    	KASSERT(amap->am_lock == srcamap->am_lock);
    	if (amap->am_nused == 0) {
    		rw_obj_free(amap->am_lock);
    		amap->am_lock = NULL;
    	}
    	amap_unlock(srcamap);
    
    	if (amap->am_lock == NULL)
    		amap_lock_alloc(amap);
    
    	/*
    	 * Install new amap.
    	 */
    	entry->aref.ar_pageoff = 0;
    	entry->aref.ar_amap = amap;
    	entry->etype &= ~UVM_ET_NEEDSCOPY;
    
    	amap_list_insert(amap);
    }
    
    /*
     * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
     *
     *	called during fork(2) when the parent process has a wired map
     *	entry.   in that case we want to avoid write-protecting pages
     *	in the parent's map (e.g. like what you'd do for a COW page)
     *	so we resolve the COW here.
     *
     * => assume parent's entry was wired, thus all pages are resident.
     * => the parent and child vm_map must both be locked.
     * => caller passes child's map/entry in to us
     * => XXXCDC: out of memory should cause fork to fail, but there is
     *	currently no easy way to do this (needs fix)
     */
    
    void
    amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
    {
    	struct vm_amap *amap = entry->aref.ar_amap;
    	int slot;
    	struct vm_anon *anon, *nanon;
    	struct vm_page *pg, *npg;
    	struct vm_amap_chunk *chunk;
    
    	/*
    	 * note that if we unlock the amap then we must ReStart the "lcv" for
    	 * loop because some other process could reorder the anon's in the
    	 * am_anon[] array on us while the lock is dropped.
    	 */
    ReStart:
    	amap_lock(amap, RW_WRITE);
    	AMAP_CHUNK_FOREACH(chunk, amap) {
    		int i, map = chunk->ac_usedmap;
    
    		for (i = ffs(map); i != 0; i = ffs(map)) {
    			slot = i - 1;
    			map ^= 1 << slot;
    			anon = chunk->ac_anon[slot];
    			pg = anon->an_page;
    			KASSERT(anon->an_lock == amap->am_lock);
    
    			/*
    			 * The old page must be resident since the parent is
    			 * wired.
    			 */
    			KASSERT(pg != NULL);
    
    			/*
    			 * if the anon ref count is one, we are safe (the child
    			 * has exclusive access to the page).
    			 */
    			if (anon->an_ref <= 1)
    				continue;
    
    			/*
    			 * If the page is busy, then we have to unlock, wait for
    			 * it and then restart.
    			 */
    			if (pg->pg_flags & PG_BUSY) {
    				uvm_pagewait(pg, amap->am_lock, "cownow");
    				goto ReStart;
    			}
    
    			/*
    			 * Perform a copy-on-write.
    			 * First - get a new anon and a page.
    			 */
    			nanon = uvm_analloc();
    			if (nanon != NULL) {
    				/* the new anon will share the amap's lock */
    				nanon->an_lock = amap->am_lock;
    				npg = uvm_pagealloc(NULL, 0, nanon, 0);
    			} else
    				npg = NULL;	/* XXX: quiet gcc warning */
    
    			if (nanon == NULL || npg == NULL) {
    				/* out of memory */
    				amap_unlock(amap);
    				if (nanon != NULL) {
    					nanon->an_lock = NULL;
    					nanon->an_ref--;
    					KASSERT(nanon->an_ref == 0);
    					uvm_anfree(nanon);
    				}
    				uvm_wait("cownowpage");
    				goto ReStart;
    			}
    
    			/*
    			 * Copy the data and replace anon with the new one.
    			 * Also, setup its lock (share the with amap's lock).
    			 */
    			uvm_pagecopy(pg, npg);
    			anon->an_ref--;
    			KASSERT(anon->an_ref > 0);
    			chunk->ac_anon[slot] = nanon;
    
    			/*
    			 * Drop PG_BUSY on new page.  Since its owner was write
    			 * locked all this time - it cannot be PG_RELEASED or
    			 * PG_WANTED.
    			 */
    			atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE);
    			UVM_PAGE_OWN(npg, NULL);
    			uvm_pageactivate(npg);
    		}
    	}
    	amap_unlock(amap);
    }
    
    /*
     * amap_splitref: split a single reference into two separate references
     *
     * => called from uvm_map's clip routines
     * => origref's map should be locked
     * => origref->ar_amap should be unlocked (we will lock)
     */
    void
    amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
    {
    	struct vm_amap *amap = origref->ar_amap;
    	int leftslots;
    
    	KASSERT(splitref->ar_amap == amap);
    	AMAP_B2SLOT(leftslots, offset);
    	if (leftslots == 0)
    		panic("amap_splitref: split at zero offset");
    
    	amap_lock(amap, RW_WRITE);
    
    	if (amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
    		panic("amap_splitref: map size check failed");
    
    	/* Establish ppref before we add a duplicate reference to the amap. */
    	if (amap->am_ppref == NULL)
    		amap_pp_establish(amap);
    
    	/* Note: not a share reference. */
    	amap->am_ref++;
    	splitref->ar_amap = amap;
    	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
    	amap_unlock(amap);
    }
    
    /*
     * amap_pp_establish: add a ppref array to an amap, if possible.
     *
     * => amap should be locked by caller* => amap should be locked by caller
     */
    void
    amap_pp_establish(struct vm_amap *amap)
    {
    
    	KASSERT(rw_write_held(amap->am_lock));
    	amap->am_ppref = mallocarray(amap->am_nslot, sizeof(int),
    	    M_UVMAMAP, M_NOWAIT|M_ZERO);
    
    	if (amap->am_ppref == NULL) {
    		/* Failure - just do not use ppref. */
    		amap->am_ppref = PPREF_NONE;
    		return;
    	}
    
    	pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
    }
    
    /*
     * amap_pp_adjref: adjust reference count to a part of an amap using the
     * per-page reference count array.
     *
     * => caller must check that ppref != PPREF_NONE before calling.
     * => map and amap must be locked.
     */
    void
    amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
    {
     	int stopslot, *ppref, lcv, prevlcv;
     	int ref, len, prevref, prevlen;
    
    	KASSERT(rw_write_held(amap->am_lock));
    
    	stopslot = curslot + slotlen;
    	ppref = amap->am_ppref;
     	prevlcv = 0;
    
    	/*
    	 * Advance to the correct place in the array, fragment if needed.
    	 */
    	for (lcv = 0 ; lcv < curslot ; lcv += len) {
    		pp_getreflen(ppref, lcv, &ref, &len);
    		if (lcv + len > curslot) {     /* goes past start? */
    			pp_setreflen(ppref, lcv, ref, curslot - lcv);
    			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
    			len = curslot - lcv;   /* new length of entry @ lcv */
    		}
    		prevlcv = lcv;
    	}
    	if (lcv != 0)
    		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
    	else {
    		/*
    		 * Ensure that the "prevref == ref" test below always
    		 * fails, since we are starting from the beginning of
    		 * the ppref array; that is, there is no previous chunk.
    		 */
    		prevref = -1;
    		prevlen = 0;
    	}
    
    	/*
    	 * Now adjust reference counts in range.  Merge the first
    	 * changed entry with the last unchanged entry if possible.
    	 */
    	if (lcv != curslot)
    		panic("amap_pp_adjref: overshot target");
    
    	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
    		pp_getreflen(ppref, lcv, &ref, &len);
    		if (lcv + len > stopslot) {     /* goes past end? */
    			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
    			pp_setreflen(ppref, stopslot, ref,
    			    len - (stopslot - lcv));
    			len = stopslot - lcv;
    		}
    		ref += adjval;
    		if (ref < 0)
    			panic("amap_pp_adjref: negative reference count");
    		if (lcv == prevlcv + prevlen && ref == prevref) {
    			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
    		} else {
    			pp_setreflen(ppref, lcv, ref, len);
    		}
    		if (ref == 0)
    			amap_wiperange(amap, lcv, len);
    	}
    
    }
    
    void
    amap_wiperange_chunk(struct vm_amap *amap, struct vm_amap_chunk *chunk,
        int slotoff, int slots)
    {
    	int curslot, i, map;
    	int startbase, endbase;
    	struct vm_anon *anon;
    
    	startbase = AMAP_BASE_SLOT(slotoff);
    	endbase = AMAP_BASE_SLOT(slotoff + slots - 1);
    
    	map = chunk->ac_usedmap;
    	if (startbase == chunk->ac_baseslot)
    		map &= ~((1 << (slotoff - startbase)) - 1);
    	if (endbase == chunk->ac_baseslot)
    		map &= (1 << (slotoff + slots - endbase)) - 1;
    
    	for (i = ffs(map); i != 0; i = ffs(map)) {
    		int refs;
    
    		curslot = i - 1;
    		map ^= 1 << curslot;
    		chunk->ac_usedmap ^= 1 << curslot;
    		anon = chunk->ac_anon[curslot];
    		KASSERT(anon->an_lock == amap->am_lock);
    
    		/* remove it from the amap */
    		chunk->ac_anon[curslot] = NULL;
    
    		amap->am_nused--;
    
    		/* drop anon reference count */
    		refs = --anon->an_ref;
    		if (refs == 0) {
    			uvm_anfree(anon);
    		}
    
    		/*
    		 * done with this anon, next ...!
    		 */
    
    	}	/* end of 'for' loop */
    }
    
    /*
     * amap_wiperange: wipe out a range of an amap.
     * Note: different from amap_wipeout because the amap is kept intact.
     *
     * => Both map and amap must be locked by caller.
     */
    void
    amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
    {
    	int bucket, startbucket, endbucket;
    	struct vm_amap_chunk *chunk, *nchunk;
    
    	KASSERT(rw_write_held(amap->am_lock));
    
    	startbucket = UVM_AMAP_BUCKET(amap, slotoff);
    	endbucket = UVM_AMAP_BUCKET(amap, slotoff + slots - 1);
    
    	/*
    	 * We can either traverse the amap by am_chunks or by am_buckets.
    	 * Determine which way is less expensive.
    	 */
    	if (UVM_AMAP_SMALL(amap))
    		amap_wiperange_chunk(amap, &amap->am_small, slotoff, slots);
    	else if (endbucket + 1 - startbucket >= amap->am_ncused) {
    		TAILQ_FOREACH_SAFE(chunk, &amap->am_chunks, ac_list, nchunk) {
    			if (chunk->ac_baseslot + chunk->ac_nslot <= slotoff)
    				continue;
    			if (chunk->ac_baseslot >= slotoff + slots)
    				continue;
    
    			amap_wiperange_chunk(amap, chunk, slotoff, slots);
    			if (chunk->ac_usedmap == 0)
    				amap_chunk_free(amap, chunk);
    		}
    	} else {
    		for (bucket = startbucket; bucket <= endbucket; bucket++) {
    			for (chunk = amap->am_buckets[bucket]; chunk != NULL;
    			    chunk = nchunk) {
    				nchunk = TAILQ_NEXT(chunk, ac_list);
    
    				if (UVM_AMAP_BUCKET(amap, chunk->ac_baseslot) !=
    				    bucket)
    					break;
    				if (chunk->ac_baseslot + chunk->ac_nslot <=
    				    slotoff)
    					continue;
    				if (chunk->ac_baseslot >= slotoff + slots)
    					continue;
    
    				amap_wiperange_chunk(amap, chunk, slotoff,
    				    slots);
    				if (chunk->ac_usedmap == 0)
    					amap_chunk_free(amap, chunk);
    			}
    		}
    	}
    }
    
    /*
     * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
     *
     * => note that we don't always traverse all anons.
     *    eg. amaps being wiped out, released anons.
     * => return TRUE if failed.
     */
    
    boolean_t
    amap_swap_off(int startslot, int endslot)
    {
    	struct vm_amap *am;
    	struct vm_amap *am_next;
    	struct vm_amap marker;
    	boolean_t rv = FALSE;
    
    	amap_lock_list();
    	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
    		int i, map;
    		struct vm_amap_chunk *chunk;
    
    		amap_lock(am, RW_WRITE);
    		if (am->am_nused == 0) {
    			amap_unlock(am);
    			am_next = LIST_NEXT(am, am_list);
    			continue;
    		}
    
    		LIST_INSERT_AFTER(am, &marker, am_list);
    		amap_unlock_list();
    
    again:
    		AMAP_CHUNK_FOREACH(chunk, am) {
    			map = chunk->ac_usedmap;
    
    			for (i = ffs(map); i != 0; i = ffs(map)) {
    				int swslot;
    				int slot = i - 1;
    				struct vm_anon *anon;
    
    				map ^= 1 << slot;
    				anon = chunk->ac_anon[slot];
    
    				swslot = anon->an_swslot;
    				if (swslot < startslot || endslot <= swslot) {
    					continue;
    				}
    
    				am->am_flags |= AMAP_SWAPOFF;
    
    				rv = uvm_anon_pagein(am, anon);
    				amap_lock(am, RW_WRITE);
    
    				am->am_flags &= ~AMAP_SWAPOFF;
    				if (amap_refs(am) == 0) {
    					amap_wipeout(am);
    					am = NULL;
    					goto nextamap;
    				}
    				if (rv)
    					goto nextamap;
    				goto again;
    			}
    		}
    nextamap:
    		if (am != NULL)
    			amap_unlock(am);
    		amap_lock_list();
    		am_next = LIST_NEXT(&marker, am_list);
    		LIST_REMOVE(&marker, am_list);
    	}
    	amap_unlock_list();
    
    	return rv;
    }
    
    /*
     * amap_lookup: look up a page in an amap.
     *
     * => amap should be locked by caller.
     */
    struct vm_anon *
    amap_lookup(struct vm_aref *aref, vaddr_t offset)
    {
    	int slot;
    	struct vm_amap *amap = aref->ar_amap;
    	struct vm_amap_chunk *chunk;
    
    	AMAP_B2SLOT(slot, offset);
    	slot += aref->ar_pageoff;
    	KASSERT(slot < amap->am_nslot);
    
    	chunk = amap_chunk_get(amap, slot, 0, PR_NOWAIT);
    	if (chunk == NULL)
    		return NULL;
    
    	return chunk->ac_anon[UVM_AMAP_SLOTIDX(slot)];
    }
    
    /*
     * amap_lookups: look up a range of pages in an amap.
     *
     * => amap should be locked by caller.
     * => XXXCDC: this interface is biased toward array-based amaps.  fix.
     */
    void
    amap_lookups(struct vm_aref *aref, vaddr_t offset,
        struct vm_anon **anons, int npages)
    {
    	int i, lcv, n, slot;
    	struct vm_amap *amap = aref->ar_amap;
    	struct vm_amap_chunk *chunk = NULL;
    
    	AMAP_B2SLOT(slot, offset);
    	slot += aref->ar_pageoff;
    
    	KASSERT((slot + (npages - 1)) < amap->am_nslot);
    
    	for (i = 0, lcv = slot; lcv < slot + npages; i += n, lcv += n) {
    		n = UVM_AMAP_CHUNK - UVM_AMAP_SLOTIDX(lcv);
    		if (lcv + n > slot + npages)
    			n = slot + npages - lcv;
    
    		chunk = amap_chunk_get(amap, lcv, 0, PR_NOWAIT);
    		if (chunk == NULL)
    			memset(&anons[i], 0, n * sizeof(*anons));
    		else
    			memcpy(&anons[i],
    			    &chunk->ac_anon[UVM_AMAP_SLOTIDX(lcv)],
    			    n * sizeof(*anons));
    	}
    }
    
    /*
     * amap_populate: ensure that the amap can store an anon for the page at
     * offset. This function can sleep until memory to store the anon is
     * available.
     */
    void
    amap_populate(struct vm_aref *aref, vaddr_t offset)
    {
    	int slot;
    	struct vm_amap *amap = aref->ar_amap;
    	struct vm_amap_chunk *chunk;
    
    	AMAP_B2SLOT(slot, offset);
    	slot += aref->ar_pageoff;
    	KASSERT(slot < amap->am_nslot);
    
    	chunk = amap_chunk_get(amap, slot, 1, PR_WAITOK);
    	KASSERT(chunk != NULL);
    }
    
    /*
     * amap_add: add (or replace) a page to an amap.
     *
     * => amap should be locked by caller.
     * => anon must have the lock associated with this amap.
     */
    int
    amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
        boolean_t replace)
    {
    	int slot;
    	struct vm_amap *amap = aref->ar_amap;
    	struct vm_amap_chunk *chunk;
    
    	AMAP_B2SLOT(slot, offset);
    	slot += aref->ar_pageoff;
    	KASSERT(slot < amap->am_nslot);
    
    	chunk = amap_chunk_get(amap, slot, 1, PR_NOWAIT);
    	if (chunk == NULL)
    		return 1;
    
    	slot = UVM_AMAP_SLOTIDX(slot);
    	if (replace) {
    		struct vm_anon *oanon  = chunk->ac_anon[slot];
    
    		KASSERT(oanon != NULL);
    		if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) {
    			pmap_page_protect(oanon->an_page, PROT_NONE);
    			/*
    			 * XXX: suppose page is supposed to be wired somewhere?
    			 */
    		}
    	} else {   /* !replace */
    		if (chunk->ac_anon[slot] != NULL)
    			panic("amap_add: slot in use");
    
    		chunk->ac_usedmap |= 1 << slot;
    		amap->am_nused++;
    	}
    	chunk->ac_anon[slot] = anon;
    
    	return 0;
    }
    
    /*
     * amap_unadd: remove a page from an amap.
     *
     * => amap should be locked by caller.
     */
    void
    amap_unadd(struct vm_aref *aref, vaddr_t offset)
    {
    	struct vm_amap *amap = aref->ar_amap;
    	struct vm_amap_chunk *chunk;
    	int slot;
    
    	KASSERT(rw_write_held(amap->am_lock));
    
    	AMAP_B2SLOT(slot, offset);
    	slot += aref->ar_pageoff;
    	KASSERT(slot < amap->am_nslot);
    	chunk = amap_chunk_get(amap, slot, 0, PR_NOWAIT);
    	KASSERT(chunk != NULL);
    
    	slot = UVM_AMAP_SLOTIDX(slot);
    	KASSERT(chunk->ac_anon[slot] != NULL);
    
    	chunk->ac_anon[slot] = NULL;
    	chunk->ac_usedmap &= ~(1 << slot);
    	amap->am_nused--;
    
    	if (chunk->ac_usedmap == 0)
    		amap_chunk_free(amap, chunk);
    }
    
    /*
     * amap_adjref_anons: adjust the reference count(s) on amap and its anons.
     */
    static void
    amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
        int refv, boolean_t all)
    {
    	KASSERT(rw_write_held(amap->am_lock));
    
    	/*
    	 * We must establish the ppref array before changing am_ref
    	 * so that the ppref values match the current amap refcount.
    	 */
    	if (amap->am_ppref == NULL && !all && len != amap->am_nslot) {
    		amap_pp_establish(amap);
    	}
    	amap->am_ref += refv;
    
    	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
    		if (all) {
    			amap_pp_adjref(amap, 0, amap->am_nslot, refv);
    		} else {
    			amap_pp_adjref(amap, offset, len, refv);
    		}
    	}
    	amap_unlock(amap);
    }
    
    /*
     * amap_ref: gain a reference to an amap.
     *
     * => amap must not be locked (we will lock).
     * => "offset" and "len" are in units of pages.
     * => Called at fork time to gain the child's reference.
     */
    void
    amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
    {
    	amap_lock(amap, RW_WRITE);
    	if (flags & AMAP_SHARED)
    		amap->am_flags |= AMAP_SHARED;
    	amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0);
    }
    
    /*
     * amap_unref: remove a reference to an amap.
     *
     * => All pmap-level references to this amap must be already removed.
     * => Called from uvm_unmap_detach(); entry is already removed from the map.
     * => We will lock amap, so it must be unlocked.
     */
    void
    amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all)
    {
    	amap_lock(amap, RW_WRITE);
    
    	KASSERT(amap->am_ref > 0);
    
    	if (amap->am_ref == 1) {
    		/*
    		 * If the last reference - wipeout and destroy the amap.
    		 */
    		amap->am_ref--;
    		amap_wipeout(amap);
    		return;
    	}
    
    	/*
    	 * Otherwise, drop the reference count(s) on anons.
    	 */
    	if (amap->am_ref == 2 && (amap->am_flags & AMAP_SHARED) != 0) {
    		amap->am_flags &= ~AMAP_SHARED;
    	}
    	amap_adjref_anons(amap, offset, len, -1, all);
    }