ZenLib
MemoryUtils.h
Go to the documentation of this file.
1/* Copyright (c) MediaArea.net SARL. All Rights Reserved.
2 *
3 * Use of this source code is governed by a zlib-style license that can
4 * be found in the License.txt file in the root of the source tree.
5 */
6
7//---------------------------------------------------------------------------
8#ifndef ZenLib_MemoryUtilsH
9#define ZenLib_MemoryUtilsH
10//---------------------------------------------------------------------------
11
12//---------------------------------------------------------------------------
13#include "ZenLib/Conf.h"
14#include "ZenLib/Conf.h"
15//---------------------------------------------------------------------------
16
17#include <cstring>
18#ifdef ZENLIB_MEMUTILS_SSE2
19 #include <emmintrin.h>
20#endif //ZENLIB_MEMUTILS_SSE2
21
22namespace ZenLib
23{
24
25#ifndef ZENLIB_MEMUTILS_SSE2
26 //-----------------------------------------------------------------------
27 // Memory alloc/free
28 #define malloc_Aligned128 (size) \
29 malloc (size)
30 #define free_Aligned128 (ptr) \
31 free (ptr)
32
33 //-----------------------------------------------------------------------
34 // Arbitrary size - To Unaligned
35 #define memcpy_Unaligned_Unaligned memcpy
36 #define memcpy_Aligned128_Unaligned memcpy
37
38 //-----------------------------------------------------------------------
39 // Arbitrary size - To Aligned 128 bits (16 bytes)
40 #define memcpy_Unaligned_Aligned128 memcpy
41 #define memcpy_Aligned128_Aligned128 memcpy
42
43 //-----------------------------------------------------------------------
44 // 128 bits - To Unaligned
45 #define memcpy_Unaligned_Unaligned_Once128 memcpy
46
47 //-----------------------------------------------------------------------
48 // 128 bits - To Aligned 128 bits (16 bytes)
49 #define memcpy_Aligned128_Aligned128_Once128 memcpy
50
51 //-----------------------------------------------------------------------
52 // 1024 bits - To Unaligned
53 #define memcpy_Unaligned_Unaligned_Once1024 memcpy
54
55 //-----------------------------------------------------------------------
56 // 1024 bits - To Aligned 128 bits (16 bytes)
57 #define memcpy_Aligned128_Aligned128_Once1024 memcpy
58
59 //-----------------------------------------------------------------------
60 // 128-bit multiple - To Aligned 128 bits (16 bytes)
61 #define memcpy_Unaligned_Aligned128_Size128 memcpy
62 #define memcpy_Aligned128_Aligned128_Size128 memcpy
63
64#else // ZENLIB_MEMUTILS_SSE2
65
66 //-----------------------------------------------------------------------
67 // Memory alloc/free
68
69 inline void* malloc_Aligned128 (size_t size)
70 {
71 return _aligned_malloc (size, 16); //aligned_alloc in C11
72 }
73
74 inline void free_Aligned128 ( void *ptr )
75 {
76 _aligned_free (ptr); //free in C11
77 }
78
79 //-----------------------------------------------------------------------
80 // Arbitrary size - To Unaligned
81
82 inline void memcpy_Unaligned_Unaligned (void* destination, const void* source, size_t num)
83 {
84 size_t extra=num&0xF;
85 __m128i* destination16=(__m128i*)destination;
86 const __m128i* source16=(const __m128i*)source;
87
88 num>>=4;
89 while (num--)
90 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
91
92 char* destination1=(char*)destination16;
93 char* source1=(char*)source16;
94 while (extra--)
95 *destination1++=*source1++;
96 }
97
98 inline void memcpy_Aligned128_Unaligned (void* destination, const void* source, size_t num)
99 {
100 size_t extra=num&0xF;
101 __m128i* destination16=(__m128i*)destination;
102 const __m128i* source16=(const __m128i*)source;
103
104 num>>=4;
105 while (num--)
106 _mm_storeu_si128 (destination16++, _mm_load_si128(source16++));
107
108 char* destination1=(char*)destination16;
109 char* source1=(char*)source16;
110 while (extra--)
111 *destination1++=*source1++;
112 }
113
114 //-----------------------------------------------------------------------
115 // Arbitrary size - To Aligned 128 bits (16 bytes)
116
117 inline void memcpy_Unaligned_Aligned128 (void* destination, const void* source, size_t num)
118 {
119 size_t extra=num&0xF;
120 __m128i* destination16=(__m128i*)destination;
121 const __m128i* source16=(const __m128i*)source;
122
123 num>>=4;
124 while (num--)
125 _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++));
126
127 char* destination1=(char*)destination16;
128 char* source1=(char*)source16;
129 while (extra--)
130 *destination1++=*source1++;
131 }
132
133 //-----------------------------------------------------------------------
134 // 128 bits - To Unaligned
135
136 inline void memcpy_Unaligned_Unaligned_Once128 (void* destination, const void* source)
137 {
138 _mm_storeu_si128 ((__m128i*)destination, _mm_loadu_si128((const __m128i*)source));
139 }
140
141 //-----------------------------------------------------------------------
142 // 128 bits - To Aligned 128 bits (16 bytes)
143
144 inline void memcpy_Aligned128_Aligned128 (void* destination, const void* source, size_t num)
145 {
146 size_t extra=num&0xF;
147 __m128i* destination16=(__m128i*)destination;
148 const __m128i* source16=(const __m128i*)source;
149
150 num>>=4;
151 while (num--)
152 _mm_stream_si128 (destination16++, _mm_load_si128(source16++));
153
154 char* destination1=(char*)destination16;
155 char* source1=(char*)source16;
156 while (extra--)
157 *destination1++=*source1++;
158 }
159
160 inline void memcpy_Aligned128_Aligned128_Size128 (void* destination, const void* source, size_t num)
161 {
162 __m128i* destination16=(__m128i*)destination;
163 const __m128i* source16=(__m128i*)source;
164
165 num>>=4;
166 while (num--)
167 _mm_stream_si128 (destination16++, _mm_load_si128(source16++));
168 }
169
170 //-----------------------------------------------------------------------
171 // 1024 bits - To Unaligned
172
173 inline void memcpy_Unaligned_Unaligned_Once1024 (void* destination, const void* source, size_t)
174 {
175 __m128i* destination16=(__m128i*)destination;
176 const __m128i* source16=(__m128i*)source;
177
178 size_t num=8;
179 while (num--)
180 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
181 }
182
183 //-----------------------------------------------------------------------
184 // 1024 bits - To Aligned 128 bits (16 bytes)
185
186 inline void memcpy_Aligned128_Aligned128_Once128 (void* destination, const void* source)
187 {
188 _mm_stream_si128 ((__m128i*)destination, _mm_load_si128((const __m128i*)source));
189 }
190
191 //-----------------------------------------------------------------------
192 // 128-bit multiple - To Unaligned (16 bytes)
193
194 inline void memcpy_Unaligned_Unaligned_Size128 (void* destination, const void* source, size_t num)
195 {
196 __m128i* destination16=(__m128i*)destination;
197 const __m128i* source16=(const __m128i*)source;
198
199 num>>=4;
200 while (num--)
201 _mm_storeu_si128 (destination16++, _mm_loadu_si128(source16++));
202 }
203
204 inline void memcpy_Aligned128_Unaligned_Size128 (void* destination, const void* source, size_t num)
205 {
206 __m128i* destination16=(__m128i*)destination;
207 const __m128i* source16=(__m128i*)source;
208
209 num>>=4;
210 while (num--)
211 _mm_storeu_si128 (destination16++, _mm_load_si128(source16++));
212 }
213
214 //-----------------------------------------------------------------------
215 // 128-bit multiple - To Aligned 128 bits (16 bytes)
216
217 inline void memcpy_Unaligned_Aligned128_Size128 (void* destination, const void* source, size_t num)
218 {
219 __m128i* destination16=(__m128i*)destination;
220 const __m128i* source16=(__m128i*)source;
221
222 num>>=4;
223 while (num--)
224 _mm_stream_si128 (destination16++, _mm_loadu_si128(source16++));
225 }
226
227
228 /* Slower
229 inline void memcpy_Aligned128_Aligned128_Once1024 (void* destination, const void* source)
230 {
231 __m128i* destination16=(__m128i*)destination;
232 const __m128i* source16=(__m128i*)source;
233
234 size_t num=8;
235 while (num--)
236 _mm_stream_si128 (destination16++, _mm_load_si128(source16++));
237 }
238 */
239
240 /*
241 inline void memcpy_Aligned256_Aligned256 (void* destination, const void* source, size_t num) //with AVX, actually slower
242 {
243 size_t extra=num&0x1F;
244 __m256i* destination16=(__m256i*)destination;
245 const __m256i* source16=(const __m256i*)source;
246
247 num>>=5;
248 while (num--)
249 _mm256_storeu_si256 (destination16++, _mm256_loadu_si256(source16++));
250
251 char* destination1=(char*)destination16;
252 char* source1=(char*)source16;
253 while (extra--)
254 *destination1++=*source1++;
255 }
256 */
257
258#endif // ZENLIB_MEMUTILS_SSE2
259
260} //NameSpace
261
262#endif
#define memcpy_Unaligned_Unaligned
Definition: MemoryUtils.h:35
#define memcpy_Aligned128_Aligned128_Once128
Definition: MemoryUtils.h:49
#define free_Aligned128
Definition: MemoryUtils.h:30
#define memcpy_Aligned128_Unaligned
Definition: MemoryUtils.h:36
#define memcpy_Aligned128_Aligned128_Size128
Definition: MemoryUtils.h:62
#define memcpy_Unaligned_Aligned128_Size128
Definition: MemoryUtils.h:61
#define memcpy_Aligned128_Aligned128
Definition: MemoryUtils.h:41
#define memcpy_Unaligned_Unaligned_Once128
Definition: MemoryUtils.h:45
#define memcpy_Unaligned_Aligned128
Definition: MemoryUtils.h:40
#define malloc_Aligned128
Definition: MemoryUtils.h:28
#define memcpy_Unaligned_Unaligned_Once1024
Definition: MemoryUtils.h:53
Definition: BitStream.h:24