Hash :
6e4cfceb
Author :
Date :
2016-06-13T15:06:31
Refactor ANGLE's image manipulation code into a static library. Allows for chromium to make use of some of the functionality. BUG=612205 Change-Id: Ib4435ca44775a3a554b0fb3bd384bd4d31d7952d Reviewed-on: https://chromium-review.googlesource.com/351753 Reviewed-by: Geoff Lang <geofflang@chromium.org> Commit-Queue: Geoff Lang <geofflang@chromium.org>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
//
// Copyright (c) 2002-2015 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// loadimageSSE2.cpp: Defines image loading functions. It's
// in a separated file for GCC, which can enable SSE usage only per-file,
// not for code blocks that use SSE2 explicitly.
#include "image_util/loadimage.h"
#include "common/mathutil.h"
#include "common/platform.h"
#ifdef ANGLE_USE_SSE
#include <emmintrin.h>
#endif
namespace angle
{
void LoadA8ToBGRA8_SSE2(size_t width,
size_t height,
size_t depth,
const uint8_t *input,
size_t inputRowPitch,
size_t inputDepthPitch,
uint8_t *output,
size_t outputRowPitch,
size_t outputDepthPitch)
{
#if defined(ANGLE_USE_SSE)
__m128i zeroWide = _mm_setzero_si128();
for (size_t z = 0; z < depth; z++)
{
for (size_t y = 0; y < height; y++)
{
const uint8_t *source =
priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
uint32_t *dest =
priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
size_t x = 0;
// Make output writes aligned
for (; ((reinterpret_cast<intptr_t>(&dest[x]) & 0xF) != 0 && x < width); x++)
{
dest[x] = static_cast<uint32_t>(source[x]) << 24;
}
for (; x + 7 < width; x += 8)
{
__m128i sourceData = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&source[x]));
// Interleave each byte to 16bit, make the lower byte to zero
sourceData = _mm_unpacklo_epi8(zeroWide, sourceData);
// Interleave each 16bit to 32bit, make the lower 16bit to zero
__m128i lo = _mm_unpacklo_epi16(zeroWide, sourceData);
__m128i hi = _mm_unpackhi_epi16(zeroWide, sourceData);
_mm_store_si128(reinterpret_cast<__m128i *>(&dest[x]), lo);
_mm_store_si128(reinterpret_cast<__m128i *>(&dest[x + 4]), hi);
}
// Handle the remainder
for (; x < width; x++)
{
dest[x] = static_cast<uint32_t>(source[x]) << 24;
}
}
}
#else
// Ensure that this function is reported as not implemented for ARM builds because
// the instructions below are not present for that architecture.
UNIMPLEMENTED();
return;
#endif
}
void LoadRGBA8ToBGRA8_SSE2(size_t width,
size_t height,
size_t depth,
const uint8_t *input,
size_t inputRowPitch,
size_t inputDepthPitch,
uint8_t *output,
size_t outputRowPitch,
size_t outputDepthPitch)
{
#if defined(ANGLE_USE_SSE)
__m128i brMask = _mm_set1_epi32(0x00ff00ff);
for (size_t z = 0; z < depth; z++)
{
for (size_t y = 0; y < height; y++)
{
const uint32_t *source =
priv::OffsetDataPointer<uint32_t>(input, y, z, inputRowPitch, inputDepthPitch);
uint32_t *dest =
priv::OffsetDataPointer<uint32_t>(output, y, z, outputRowPitch, outputDepthPitch);
size_t x = 0;
// Make output writes aligned
for (; ((reinterpret_cast<intptr_t>(&dest[x]) & 15) != 0) && x < width; x++)
{
uint32_t rgba = source[x];
dest[x] = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
}
for (; x + 3 < width; x += 4)
{
__m128i sourceData = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&source[x]));
// Mask out g and a, which don't change
__m128i gaComponents = _mm_andnot_si128(brMask, sourceData);
// Mask out b and r
__m128i brComponents = _mm_and_si128(sourceData, brMask);
// Swap b and r
__m128i brSwapped =
_mm_shufflehi_epi16(_mm_shufflelo_epi16(brComponents, _MM_SHUFFLE(2, 3, 0, 1)),
_MM_SHUFFLE(2, 3, 0, 1));
__m128i result = _mm_or_si128(gaComponents, brSwapped);
_mm_store_si128(reinterpret_cast<__m128i *>(&dest[x]), result);
}
// Perform leftover writes
for (; x < width; x++)
{
uint32_t rgba = source[x];
dest[x] = (ANGLE_ROTL(rgba, 16) & 0x00ff00ff) | (rgba & 0xff00ff00);
}
}
}
#else
// Ensure that this function is reported as not implemented for ARM builds because
// the instructions below are not present for that architecture.
UNIMPLEMENTED();
return;
#endif
}
}