Torque3D Documentation / _generateds / tsMeshIntrinsics.sse.cpp

tsMeshIntrinsics.sse.cpp

Engine/source/ts/arch/tsMeshIntrinsics.sse.cpp

More...

Detailed Description

 1
 2//-----------------------------------------------------------------------------
 3// Copyright (c) 2012 GarageGames, LLC
 4//
 5// Permission is hereby granted, free of charge, to any person obtaining a copy
 6// of this software and associated documentation files (the "Software"), to
 7// deal in the Software without restriction, including without limitation the
 8// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 9// sell copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in
13// all copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21// IN THE SOFTWARE.
22//-----------------------------------------------------------------------------
23#include "ts/tsMesh.h"
24
25#if (defined( TORQUE_CPU_X86 ) || defined( TORQUE_CPU_X64 ))
26#include "ts/tsMeshIntrinsics.h"
27#include <xmmintrin.h>
28
29void zero_vert_normal_bulk_SSE(const dsize_t count, U8 * __restrict const outPtr, const dsize_t outStride)
30{
31   // A U8 * version of the in/out pointer
32   char *outData = reinterpret_cast<char *>(outPtr);
33   
34   __m128 vPos;
35   __m128 vNrm;
36   __m128 vMask;
37
38   const __m128 _point3f_zero_mask = { 0.0f, 0.0f, 0.0f, 1.0f };
39   vMask = _mm_load_ps((const F32*)&_point3f_zero_mask);
40
41   // pre-populate cache
42   for(S32 i = 0; i < 8; i++)
43      _mm_prefetch(reinterpret_cast<const char *>(outData +  outStride * i), _MM_HINT_T0);
44
45   for(S32 i = 0; i < count; i++)
46   {
47      TSMesh::__TSMeshVertexBase *curElem = reinterpret_cast<TSMesh::__TSMeshVertexBase *>(outData);
48
49      // prefetch 8 items ahead (should really detect cache size or something)
50      _mm_prefetch(reinterpret_cast<const char *>(outData +  outStride * 8), _MM_HINT_T0);
51
52      // load
53      vPos = _mm_load_ps(curElem->_vert);
54      vNrm = _mm_load_ps(curElem->_normal);
55
56      // mask
57      vPos = _mm_mul_ps(vPos, _point3f_zero_mask);
58      vNrm = _mm_mul_ps(vNrm, _point3f_zero_mask);
59
60      // store
61      _mm_store_ps(curElem->_vert, vPos);
62      _mm_store_ps(curElem->_normal, vNrm);
63
64      // update output pointer
65      outData += outStride;
66   }
67}
68
69//------------------------------------------------------------------------------
70
71#endif // TORQUE_CPU_X86
72