Hash :
713e8be6
        
        Author :
  
        
        Date :
2012-02-04T14:22:47
        
      
move CPU chipset specific optimization into device-cpu
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
/*
 * SHA-256 driver for ASM routine for x86_64 on Linux
 * Copyright (c) Mark Crichton <crichton@gimp.org>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 *
 */
#include "device-cpu.h"
#ifdef WANT_X8664_SSE4
#include <string.h>
#include <assert.h>
#include <xmmintrin.h>
#include <stdint.h>
#include <stdio.h>
extern void CalcSha256_x64_sse4(__m128i *res, __m128i *data, uint32_t init[8]);
static uint32_t g_sha256_k[] = {
    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /*  0 */
    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /*  8 */
    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */
    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */
    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */
    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */
    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */
    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */
    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
static uint32_t g_sha256_hinit[8] =
{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
__m128i g_4sha256_k[64];
bool scanhash_sse4_64(int thr_id, const unsigned char *pmidstate,
	unsigned char *pdata,
	unsigned char *phash1, unsigned char *phash,
	const unsigned char *ptarget,
	uint32_t max_nonce, uint32_t *last_nonce,
	uint32_t nonce)
{
    uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
    uint32_t m_midstate[8], m_w[16], m_w1[16];
    __m128i m_4w[64], m_4hash[64], m_4hash1[64];
    __m128i offset;
    int i;
	pdata += 64;
    work_restart[thr_id].restart = 0;
    /* For debugging */
    union {
        __m128i m;
        uint32_t i[4];
    } mi;
    /* Message expansion */
    memcpy(m_midstate, pmidstate, sizeof(m_midstate));
    memcpy(m_w, pdata, sizeof(m_w)); /* The 2nd half of the data */
    memcpy(m_w1, phash1, sizeof(m_w1));
    memset(m_4hash, 0, sizeof(m_4hash));
    /* Transmongrify */
    for (i = 0; i < 16; i++)
        m_4w[i] = _mm_set1_epi32(m_w[i]);
    for (i = 0; i < 16; i++)
        m_4hash1[i] = _mm_set1_epi32(m_w1[i]);
    for (i = 0; i < 64; i++)
	g_4sha256_k[i] = _mm_set1_epi32(g_sha256_k[i]);
    offset = _mm_set_epi32(0x3, 0x2, 0x1, 0x0);
    for (;;)
    {
	int j;
	m_4w[3] = _mm_add_epi32(offset, _mm_set1_epi32(nonce));
	/* Some optimization can be done here W.R.T. precalculating some hash */
        CalcSha256_x64_sse4(m_4hash1, m_4w, m_midstate);
	CalcSha256_x64_sse4(m_4hash, m_4hash1, g_sha256_hinit);
	for (j = 0; j < 4; j++) {
	    mi.m = m_4hash[7];
	    if (unlikely(mi.i[j] == 0))
		break;
        }
	/* If j = true, we found a hit...so check it */
	/* Use the C version for a check... */
	if (unlikely(j != 4)) {
		for (i = 0; i < 8; i++) {
		    mi.m = m_4hash[i];
		    *(uint32_t *)&(phash)[i*4] = mi.i[j];
		}
		if (fulltest(phash, ptarget)) {
			nonce += j;
			*last_nonce = nonce;
			*nNonce_p = nonce;
			return true;
		}
	}
        if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart))
        {
			*last_nonce = nonce;
			return false;
	}
	nonce += 4;
   }
}
#endif /* WANT_X8664_SSE4 */