winAsmBlit.cpp

Engine/source/platformWin32/winAsmBlit.cpp

More...

Public Functions

bitmapExtrude5551_asm(const void * srcMip, void * mip, U32 height, U32 width)

Detailed Description

Public Functions

bitmapExtrude5551_asm(const void * srcMip, void * mip, U32 height, U32 width)

PlatformBlitInit()

  1
  2//-----------------------------------------------------------------------------
  3// Copyright (c) 2012 GarageGames, LLC
  4//
  5// Permission is hereby granted, free of charge, to any person obtaining a copy
  6// of this software and associated documentation files (the "Software"), to
  7// deal in the Software without restriction, including without limitation the
  8// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  9// sell copies of the Software, and to permit persons to whom the Software is
 10// furnished to do so, subject to the following conditions:
 11//
 12// The above copyright notice and this permission notice shall be included in
 13// all copies or substantial portions of the Software.
 14//
 15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 20// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 21// IN THE SOFTWARE.
 22//-----------------------------------------------------------------------------
 23
 24#include "math/mMath.h"
 25#include "gfx/bitmap/gBitmap.h"
 26#include "gfx/bitmap/bitmapUtils.h"
 27
 28#if !defined(__MWERKS__) && defined(_MSC_VER)
 29#define asm _asm
 30#endif
 31
 32//--------------------------------------------------------------------------
 33void bitmapExtrude5551_asm(const void *srcMip, void *mip, U32 height, U32 width)
 34{
 35   const U16 *src = (const U16 *) srcMip;
 36   U16 *dst = (U16 *) mip;
 37   U32 stride = width << 1;
 38
 39   for(U32 y = 0; y < height; y++)
 40   {
 41      for(U32 x = 0; x < width; x++)
 42      {
 43         U32 a = src[0];
 44         U32 b = src[1];
 45         U32 c = src[stride];
 46         U32 d = src[stride+1];
 47         dst[x] = ((((a >> 11) + (b >> 11) + (c >> 11) + (d >> 11)) >> 2) << 11) |
 48                  (((  ((a >> 6) & 0x1f) + ((b >> 6) & 0x1f) + ((c >> 6) & 0x1f) + ((d >> 6) & 0x1F) ) >> 2) << 6) |
 49                  ((( ((a >> 1) & 0x1F) + ((b >> 1) & 0x1F) + ((c >> 1) & 0x1f) + ((d >> 1) & 0x1f)) >> 2) << 1);
 50         src += 2;
 51      }
 52      src += stride;
 53      dst += width;
 54   }
 55}
 56
 57
 58#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
 59
 60//--------------------------------------------------------------------------
 61void bitmapExtrudeRGB_mmx(const void *srcMip, void *mip, U32 srcHeight, U32 srcWidth)
 62{
 63   if (srcHeight == 1 || srcWidth == 1) {
 64      bitmapExtrudeRGB_c(srcMip, mip, srcHeight, srcWidth);
 65      return;
 66   }
 67
 68   U32 width  = srcWidth  >> 1;
 69   U32 height = srcHeight >> 1;
 70
 71   if (width <= 1)
 72   {
 73      bitmapExtrudeRGB_c(srcMip, mip, srcHeight, srcWidth);
 74      return;
 75   }
 76
 77   U64 ZERO = 0x0000000000000000;
 78   const U8 *src = (const U8 *) srcMip;
 79   U8 *dst = (U8 *) mip;
 80   U32 srcStride = (width << 1) * 3;
 81   U32 dstStride = width * 3;
 82
 83   for(U32 y = 0; y < height; y++)
 84   {
 85      asm
 86      {
 87         mov      eax, src
 88         mov      ebx, eax
 89         add      ebx, srcStride
 90         mov      ecx, dst
 91         mov      edx, width
 92
 93         //--------------------------------------
 94      row_loop:
 95
 96         punpcklbw   mm0, [eax]
 97         psrlw       mm0, 8
 98
 99         punpcklbw   mm1, [eax+3]
100         psrlw       mm1, 8
101         paddw       mm0, mm1
102
103         punpcklbw   mm1, [ebx]
104         psrlw       mm1, 8
105         paddw       mm0, mm1
106
107         punpcklbw   mm1, [ebx+3]
108         psrlw       mm1, 8
109         paddw       mm0, mm1
110
111         psrlw       mm0, 2
112         //pxor        mm1, mm1
113         packuswb    mm0, ZERO      // mm1
114
115         movd        [ecx], mm0
116         add         eax, 6
117         add         ebx, 6
118         add         ecx, 3
119         dec         edx
120         jnz         row_loop
121      }
122      src += srcStride + srcStride;   // advance to next line
123      dst += dstStride;
124   }
125   asm
126   {
127      emms
128   }
129}
130
131
132//--------------------------------------------------------------------------
133void bitmapConvertRGB_to_5551_mmx(U8 *src, U32 pixels)
134{
135   U64 MULFACT      = 0x0008200000082000;    // RGB quad word multiplier
136   U64 REDBLUE      = 0x00f800f800f800f8;    // Red-Blue mask
137   U64 GREEN        = 0x0000f8000000f800;    // Green mask
138   U64 ALPHA        = 0x0000000000010001;    // 100% Alpha mask
139   U64 ZERO         = 0x0000000000000000;
140
141   U32 evenPixels = pixels >> 1;       // the MMX loop can only do an even number
142   U32 oddPixels  = pixels & 1;        // of pixels since it processes 2 at a time
143
144   U16 *dst = (U16*)src;
145
146   if (evenPixels)
147   {
148      asm
149      {
150         mov         eax, src          // YES, src = dst at start
151         mov         ebx, dst          // convert image in place
152         mov         edx, evenPixels
153
154      pixel_loop2:
155         movd        mm0, [eax]        // get first 24-bit pixel
156         movd        mm1, [eax+3]      // get second 24-bit pixel
157         punpckldq   mm0, mm1          // put second in high dword
158         movq        mm1, mm0          // save the original data
159         pand        mm0, REDBLUE      // mask out all but the 5MSBits of red and blue
160         pmaddwd     mm0, MULFACT      // multiply each word by
161                                       //   2**13, 2**3, 2**13, 2**3 and add results
162         pand        mm1, GREEN        // mask out all but the 5MSBits of green
163         por         mm0, mm1          // combine the red, green, and blue bits
164         psrld       mm0, 6            // shift into position
165         packssdw    mm0, ZERO         // pack into single dword
166         pslld       mm0, 1            // shift into final position
167         por         mm0, ALPHA        // add the alpha bit
168         movd        [ebx], mm0
169
170         add         eax, 6
171         add         ebx, 4
172         dec         edx
173         jnz         pixel_loop2
174
175         mov         src, eax
176         mov         dst, ebx
177         emms
178      }
179   }
180
181   if (oddPixels)
182   {
183      U32 r = src[0] >> 3;
184      U32 g = src[1] >> 3;
185      U32 b = src[2] >> 3;
186
187      *dst = (b << 1) | (g << 6) | (r << 11) | 1;
188   }
189}
190
191#endif
192
193
194
195//--------------------------------------------------------------------------
196void PlatformBlitInit()
197{
198   bitmapExtrude5551 = bitmapExtrude5551_asm;
199   bitmapExtrudeRGB  = bitmapExtrudeRGB_c;
200
201   if (Platform::SystemInfo.processor.properties & CPU_PROP_MMX)
202   {
203#if defined(TORQUE_SUPPORTS_VC_INLINE_X86_ASM)
204      bitmapExtrudeRGB  = bitmapExtrudeRGB_mmx;
205      bitmapConvertRGB_to_5551 = bitmapConvertRGB_to_5551_mmx;
206#endif
207   }
208}
209