Hash :
ca5dbef0
Author :
Date :
2021-03-11T00:40:39
Micro-optimization for loading 3-channel images. This reduced the total time spent in LoadToNative3To4 by 10% in our micro-benchmark. Test: passed the CQ dry run. Bug: b/182513949 Change-Id: I5d1b1975f64e5b08bd0e65abc8f94fe62ef54321 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2752347 Commit-Queue: Pujun Lun <lunpujun@google.com> Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Jamie Madill <jmadill@chromium.org> Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
//
// Copyright 2014 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
#include "common/mathutil.h"
#include <string.h>
namespace angle
{
namespace priv
{
template <typename T>
inline T *OffsetDataPointer(uint8_t *data, size_t y, size_t z, size_t rowPitch, size_t depthPitch)
{
return reinterpret_cast<T*>(data + (y * rowPitch) + (z * depthPitch));
}
template <typename T>
inline const T *OffsetDataPointer(const uint8_t *data, size_t y, size_t z, size_t rowPitch, size_t depthPitch)
{
return reinterpret_cast<const T*>(data + (y * rowPitch) + (z * depthPitch));
}
} // namespace priv
template <typename type, size_t componentCount>
inline void LoadToNative(size_t width, size_t height, size_t depth,
const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch,
uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch)
{
const size_t rowSize = width * sizeof(type) * componentCount;
const size_t layerSize = rowSize * height;
const size_t imageSize = layerSize * depth;
if (layerSize == inputDepthPitch && layerSize == outputDepthPitch)
{
ASSERT(rowSize == inputRowPitch && rowSize == outputRowPitch);
memcpy(output, input, imageSize);
}
else if (rowSize == inputRowPitch && rowSize == outputRowPitch)
{
for (size_t z = 0; z < depth; z++)
{
const type *source = priv::OffsetDataPointer<type>(input, 0, z, inputRowPitch, inputDepthPitch);
type *dest = priv::OffsetDataPointer<type>(output, 0, z, outputRowPitch, outputDepthPitch);
memcpy(dest, source, layerSize);
}
}
else
{
for (size_t z = 0; z < depth; z++)
{
for (size_t y = 0; y < height; y++)
{
const type *source = priv::OffsetDataPointer<type>(input, y, z, inputRowPitch, inputDepthPitch);
type *dest = priv::OffsetDataPointer<type>(output, y, z, outputRowPitch, outputDepthPitch);
memcpy(dest, source, width * sizeof(type) * componentCount);
}
}
}
}
template <typename type, uint32_t fourthComponentBits>
inline void LoadToNative3To4(size_t width, size_t height, size_t depth,
const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch,
uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch)
{
const type fourthValue = gl::bitCast<type>(fourthComponentBits);
for (size_t z = 0; z < depth; z++)
{
for (size_t y = 0; y < height; y++)
{
const type *source = priv::OffsetDataPointer<type>(input, y, z, inputRowPitch, inputDepthPitch);
type *dest = priv::OffsetDataPointer<type>(output, y, z, outputRowPitch, outputDepthPitch);
for (size_t x = 0; x < width; x++)
{
memcpy(&dest[x * 4], &source[x * 3], sizeof(type) * 3);
dest[x * 4 + 3] = fourthValue;
}
}
}
}
template <size_t componentCount>
inline void Load32FTo16F(size_t width, size_t height, size_t depth,
const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch,
uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch)
{
const size_t elementWidth = componentCount * width;
for (size_t z = 0; z < depth; z++)
{
for (size_t y = 0; y < height; y++)
{
const float *source = priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
uint16_t *dest = priv::OffsetDataPointer<uint16_t>(output, y, z, outputRowPitch, outputDepthPitch);
for (size_t x = 0; x < elementWidth; x++)
{
dest[x] = gl::float32ToFloat16(source[x]);
}
}
}
}
template <size_t blockWidth, size_t blockHeight, size_t blockDepth, size_t blockSize>
inline void LoadCompressedToNative(size_t width, size_t height, size_t depth,
const uint8_t *input, size_t inputRowPitch, size_t inputDepthPitch,
uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch)
{
const size_t columns = (width + (blockWidth - 1)) / blockWidth;
const size_t rows = (height + (blockHeight - 1)) / blockHeight;
const size_t layers = (depth + (blockDepth - 1)) / blockDepth;
for (size_t z = 0; z < layers; ++z)
{
for (size_t y = 0; y < rows; ++y)
{
const uint8_t *source = priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
uint8_t *dest = priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
memcpy(dest, source, columns * blockSize);
}
}
}
template <typename type, uint32_t firstBits, uint32_t secondBits, uint32_t thirdBits, uint32_t fourthBits>
inline void Initialize4ComponentData(size_t width, size_t height, size_t depth,
uint8_t *output, size_t outputRowPitch, size_t outputDepthPitch)
{
type writeValues[4] =
{
gl::bitCast<type>(firstBits),
gl::bitCast<type>(secondBits),
gl::bitCast<type>(thirdBits),
gl::bitCast<type>(fourthBits),
};
for (size_t z = 0; z < depth; z++)
{
for (size_t y = 0; y < height; y++)
{
type *destRow = priv::OffsetDataPointer<type>(output, y, z, outputRowPitch, outputDepthPitch);
for (size_t x = 0; x < width; x++)
{
type* destPixel = destRow + x * 4;
// This could potentially be optimized by generating an entire row of initialization
// data and copying row by row instead of pixel by pixel.
memcpy(destPixel, writeValues, sizeof(type) * 4);
}
}
}
}
} // namespace angle