winAsmBlit.cpp
Engine/source/platformWin32/winAsmBlit.cpp
Public Functions
Detailed Description
Public Functions
bitmapExtrude5551_asm(const void * srcMip, void * mip, U32 height, U32 width)
PlatformBlitInit()
1 2//----------------------------------------------------------------------------- 3// Copyright (c) 2012 GarageGames, LLC 4// 5// Permission is hereby granted, free of charge, to any person obtaining a copy 6// of this software and associated documentation files (the "Software"), to 7// deal in the Software without restriction, including without limitation the 8// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 9// sell copies of the Software, and to permit persons to whom the Software is 10// furnished to do so, subject to the following conditions: 11// 12// The above copyright notice and this permission notice shall be included in 13// all copies or substantial portions of the Software. 14// 15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21// IN THE SOFTWARE. 22//----------------------------------------------------------------------------- 23 24#include "math/mMath.h" 25#include "gfx/bitmap/gBitmap.h" 26#include "gfx/bitmap/bitmapUtils.h" 27 28#if !defined(__MWERKS__) && defined(_MSC_VER) 29#define asm _asm 30#endif 31 32//-------------------------------------------------------------------------- 33void bitmapExtrude5551_asm(const void *srcMip, void *mip, U32 height, U32 width) 34{ 35 const U16 *src = (const U16 *) srcMip; 36 U16 *dst = (U16 *) mip; 37 U32 stride = width << 1; 38 39 for(U32 y = 0; y < height; y++) 40 { 41 for(U32 x = 0; x < width; x++) 42 { 43 U32 a = src[0]; 44 U32 b = src[1]; 45 U32 c = src[stride]; 46 U32 d = src[stride+1]; 47 dst[x] = ((((a >> 11) + (b >> 11) + (c >> 11) + (d >> 11)) >> 2) << 11) | 48 ((( ((a >> 6) & 0x1f) + ((b >> 6) & 0x1f) + ((c >> 6) & 0x1f) + ((d >> 6) & 0x1F) ) >> 2) << 6) | 49 ((( ((a >> 1) & 0x1F) + ((b >> 1) & 0x1F) + ((c >> 1) & 0x1f) + ((d >> 1) & 0x1f)) >> 2) << 1); 50 src += 2; 51 } 52 src += stride; 53 dst += width; 54 } 55} 56 57 58#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM) 59 60//-------------------------------------------------------------------------- 61void bitmapExtrudeRGB_mmx(const void *srcMip, void *mip, U32 srcHeight, U32 srcWidth) 62{ 63 if (srcHeight == 1 || srcWidth == 1) { 64 bitmapExtrudeRGB_c(srcMip, mip, srcHeight, srcWidth); 65 return; 66 } 67 68 U32 width = srcWidth >> 1; 69 U32 height = srcHeight >> 1; 70 71 if (width <= 1) 72 { 73 bitmapExtrudeRGB_c(srcMip, mip, srcHeight, srcWidth); 74 return; 75 } 76 77 U64 ZERO = 0x0000000000000000; 78 const U8 *src = (const U8 *) srcMip; 79 U8 *dst = (U8 *) mip; 80 U32 srcStride = (width << 1) * 3; 81 U32 dstStride = width * 3; 82 83 for(U32 y = 0; y < height; y++) 84 { 85 asm 86 { 87 mov eax, src 88 mov ebx, eax 89 add ebx, srcStride 90 mov ecx, dst 91 mov edx, width 92 93 //-------------------------------------- 94 row_loop: 95 96 punpcklbw mm0, [eax] 97 psrlw mm0, 8 98 99 punpcklbw mm1, [eax+3] 100 psrlw mm1, 8 101 paddw mm0, mm1 102 103 punpcklbw mm1, [ebx] 104 psrlw mm1, 8 105 paddw mm0, mm1 106 107 punpcklbw mm1, [ebx+3] 108 psrlw mm1, 8 109 paddw mm0, mm1 110 111 psrlw mm0, 2 112 //pxor mm1, mm1 113 packuswb mm0, ZERO // mm1 114 115 movd [ecx], mm0 116 add eax, 6 117 add ebx, 6 118 add ecx, 3 119 dec edx 120 jnz row_loop 121 } 122 src += srcStride + srcStride; // advance to next line 123 dst += dstStride; 124 } 125 asm 126 { 127 emms 128 } 129} 130 131 132//-------------------------------------------------------------------------- 133void bitmapConvertRGB_to_5551_mmx(U8 *src, U32 pixels) 134{ 135 U64 MULFACT = 0x0008200000082000; // RGB quad word multiplier 136 U64 REDBLUE = 0x00f800f800f800f8; // Red-Blue mask 137 U64 GREEN = 0x0000f8000000f800; // Green mask 138 U64 ALPHA = 0x0000000000010001; // 100% Alpha mask 139 U64 ZERO = 0x0000000000000000; 140 141 U32 evenPixels = pixels >> 1; // the MMX loop can only do an even number 142 U32 oddPixels = pixels & 1; // of pixels since it processes 2 at a time 143 144 U16 *dst = (U16*)src; 145 146 if (evenPixels) 147 { 148 asm 149 { 150 mov eax, src // YES, src = dst at start 151 mov ebx, dst // convert image in place 152 mov edx, evenPixels 153 154 pixel_loop2: 155 movd mm0, [eax] // get first 24-bit pixel 156 movd mm1, [eax+3] // get second 24-bit pixel 157 punpckldq mm0, mm1 // put second in high dword 158 movq mm1, mm0 // save the original data 159 pand mm0, REDBLUE // mask out all but the 5MSBits of red and blue 160 pmaddwd mm0, MULFACT // multiply each word by 161 // 2**13, 2**3, 2**13, 2**3 and add results 162 pand mm1, GREEN // mask out all but the 5MSBits of green 163 por mm0, mm1 // combine the red, green, and blue bits 164 psrld mm0, 6 // shift into position 165 packssdw mm0, ZERO // pack into single dword 166 pslld mm0, 1 // shift into final position 167 por mm0, ALPHA // add the alpha bit 168 movd [ebx], mm0 169 170 add eax, 6 171 add ebx, 4 172 dec edx 173 jnz pixel_loop2 174 175 mov src, eax 176 mov dst, ebx 177 emms 178 } 179 } 180 181 if (oddPixels) 182 { 183 U32 r = src[0] >> 3; 184 U32 g = src[1] >> 3; 185 U32 b = src[2] >> 3; 186 187 *dst = (b << 1) | (g << 6) | (r << 11) | 1; 188 } 189} 190 191#endif 192 193 194 195//-------------------------------------------------------------------------- 196void PlatformBlitInit() 197{ 198 bitmapExtrude5551 = bitmapExtrude5551_asm; 199 bitmapExtrudeRGB = bitmapExtrudeRGB_c; 200 201 if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX) 202 { 203#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM) 204 bitmapExtrudeRGB = bitmapExtrudeRGB_mmx; 205 bitmapConvertRGB_to_5551 = bitmapConvertRGB_to_5551_mmx; 206#endif 207 } 208} 209