akmalloc
 All Data Structures Files Functions Variables Typedefs Macros Pages
malloc.c
Go to the documentation of this file.
1 /*
2 This is free and unencumbered software released into the public domain.
3 
4 Anyone is free to copy, modify, publish, use, compile, sell, or
5 distribute this software, either in source code form or as a compiled
6 binary, for any purpose, commercial or non-commercial, and by any
7 means.
8 
9 In jurisdictions that recognize copyright laws, the author or authors
10 of this software dedicate any and all copyright interest in the
11 software to the public domain. We make this dedication for the benefit
12 of the public at large and to the detriment of our heirs and
13 successors. We intend this dedication to be an overt act of
14 relinquishment in perpetuity of all present and future rights to this
15 software under copyright law.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 OTHER DEALINGS IN THE SOFTWARE.
24 
25 For more information, please refer to <http://unlicense.org/>
26 */
27 
33 #ifndef AKMALLOC_MALLOC_C
34 #define AKMALLOC_MALLOC_C
35 
85 #define AKMALLOC_MAJOR_VER 1
86 #define AKMALLOC_MINOR_VER 0
87 #define AKMALLOC_PATCH_VER 1
88 
89 #if !defined(AKMALLOC_INCLUDE_ONLY) && !defined(AKMALLOC_LINK_STATIC) && !defined(AKMALLOC_BUILD)
90 # error "You must set one of the defined AKMALLOC_INCLUDE_ONLY (header only), AKMALLOC_LINK_STATIC (static libs), or AKMALLOC_BUILD (shared libs)"
91 #endif
92 
93 /*
94  * Revision history:
95  *
96  *
97  * (July 16, 2016) 1.0.1: Added customized coalescing allocators to malloc state.
98  * - Performance and memory preservation are good
99  * - More debuggable, with diagnostic tooling
100  *
101  * (July 11, 2016) 0.1.2: Functioning multi threaded malloc.
102  *
103  * (July 10, 2016) 0.1.1: Functioning single threaded malloc.
104  *
105  * (March 11, 2016) 0.0.1: Initial commit
106  */
107 
108 /*
109  * Compiler definition macros
110  */
111 #if defined(_MSC_VER)
112 # define AKMALLOC_MSVC 1
113 #endif
114 
115 #if defined(__GNUC__) && !defined(__clang__)
116 # define AKMALLOC_GCC 1
117 #endif
118 
119 #if defined(__clang__)
120 # define AKMALLOC_CLANG 1
121 #endif
122 
123 #if !AKMALLOC_MSVC && !AKMALLOC_GCC && !AKMALLOC_CLANG
124 # error "Unsupported compiler!"
125 #endif
126 
127 /*
128  * Platform definition macros
129  */
130 #if defined(_WIN32)
131 # define AKMALLOC_LINUX 0
132 # define AKMALLOC_WINDOWS 1
133 # define AKMALLOC_IOS 0
134 # define AKMALLOC_MACOS 0
135 #endif
136 
137 #if defined(__linux__)
138 # define AKMALLOC_LINUX 1
139 # define AKMALLOC_WINDOWS 0
140 # define AKMALLOC_IOS 0
141 # define AKMALLOC_MACOS 0
142 #endif
143 
144 #if defined(__APPLE__)
145 #include <TargetConditionals.h>
146 # if TARGET_IPHONE_SIMULATOR || TARGET_OS_IPHONE
147 # define AKMALLOC_LINUX 0
148 # define AKMALLOC_WINDOWS 0
149 # define AKMALLOC_IOS 1
150 # define AKMALLOC_MACOS 0
151 # elif TARGET_OS_MAC
152 # define AKMALLOC_LINUX 0
153 # define AKMALLOC_WINDOWS 0
154 # define AKMALLOC_IOS 0
155 # define AKMALLOC_MACOS 1
156 # else
157 # error "Unknown Apple platform"
158 # endif
159 #endif
160 
161 #if !AKMALLOC_LINUX && !AKMALLOC_WINDOWS && !AKMALLOC_IOS && !AKMALLOC_MACOS
162 # error "Invalid or unsupported platform!"
163 #endif
164 
165 /*
166  * Bit-ness definition macros
167  * see: https://sourceforge.net/p/predef/wiki/Architectures/
168  */
169 #if defined(_WIN32)
170 # if defined(_WIN64)
171 # define AKMALLOC_BITNESS 64
172 # else
173 # define AKMALLOC_BITNESS 32
174 # endif
175 #endif
176 
177 #if AKMALLOC_GCC || AKMALLOC_CLANG
178 # if defined(__LP64__) || defined(__x86_64__) || defined(__ppc64__) || defined(__aarch64__) || defined(__ARM_ARCH_ISA_A64)
179 # define AKMALLOC_BITNESS 64
180 # else
181 # define AKMALLOC_BITNESS 32
182 # endif
183 #endif
184 
185 #if !defined(AKMALLOC_BITNESS) || (AKMALLOC_BITNESS != 32 && AKMALLOC_BITNESS != 64)
186 # error "Invalid bitness or bitness undefined."
187 #endif
188 
189 /*
190  * ISA definition macros
191  * see: https://sourceforge.net/p/predef/wiki/Architectures/
192  */
193 #if defined(__arm__) || defined(_M_ARM)
194 # define AKMALLOC_ARM 1
195 #else
196 # define AKMALLOC_ARM 0
197 #endif
198 
199 
200 /********************** setup begin ************************/
201 #if !defined(AK_EXTERN_C_BEGIN)
202 # if defined(__cplusplus)
203 # define AK_EXTERN_C_BEGIN extern "C" {
204 # define AK_EXTERN_C_END }/*extern C*/
205 # else
206 # define AK_EXTERN_C_BEGIN
207 # define AK_EXTERN_C_END
208 # endif
209 #endif
210 
211 #if !defined(AKMALLOC_INCLUDE_ONLY)
212 # include "akmalloc/malloc.h"
213 #endif
214 
215 #if !AKMALLOC_WINDOWS
216 # ifndef _GNU_SOURCE
217 # define _GNU_SOURCE
218 # endif
219 #endif
220 
221 typedef int ak_i32;
222 typedef unsigned int ak_u32;
223 
224 #if AKMALLOC_MSVC
225 typedef __int64 ak_i64;
226 typedef unsigned __int64 ak_u64;
227 #else
228 typedef long long int ak_i64;
229 typedef unsigned long long int ak_u64;
230 #endif
231 
232 #if AKMALLOC_BITNESS == 32
233 typedef ak_u32 ak_sz;
234 typedef ak_i32 ak_ssz;
235 #else
236 typedef ak_u64 ak_sz;
237 typedef ak_i64 ak_ssz;
238 #endif
239 
240 #if AKMALLOC_MSVC
241 # define ak_inline __forceinline
242 #else
243 # define ak_inline __inline__ __attribute__((always_inline))
244 #endif/*AKMALLOC_MSVC*/
245 
246 #if !defined(NDEBUG)
247 # undef ak_inline
248 # define ak_inline
249 #endif
250 
251 #if AKMALLOC_MSVC
252 # define ak_likely(x) x
253 # define ak_unlikely(x) x
254 #else
255 # define ak_likely(x) __builtin_expect(!!(x), 1)
256 # define ak_unlikely(x) __builtin_expect(!!(x), 0)
257 #endif/*AKMALLOC_MSVC*/
258 
259 #define AK_SZ_ONE ((ak_sz)1)
260 
261 #define AK_SZ_MAX (~((ak_sz)0))
262 #define AK_U32_MAX (~((ak_u32)0))
263 
264 #define AK_NULLPTR 0
265 
266 #define AKMALLOC_DEFAULT_PAGE_SIZE 4096
267 #define AKMALLOC_DEFAULT_LARGE_BLOCK_SIZE (AK_SZ_ONE << 21)
268 
269 /*
270  * Cache line macros
271  */
272 #if AKMALLOC_ARM
273 # define AKMALLOC_CACHE_LINE_LENGTH 32
274 #else
275 # define AKMALLOC_CACHE_LINE_LENGTH 64
276 #endif
277 
278 #define ak_as_ptr(x) (&(x))
279 
280 #define ak_ptr_cast(ty, expr) ((ty*)((void*)(expr)))
281 /********************** setup end ************************/
282 
283 /********************** assert begin ************************/
284 #if !defined(AKMALLOC_ASSERT_IMPL)
285 # include <stdlib.h>
286 # include <stdio.h>
287 # define AKMALLOC_ASSERT_IMPL(x) \
288  if (!(x)) { \
289  fprintf(stderr, "%s (%d) : %s\n", __FILE__, __LINE__, "ASSERT: failed condition `" #x "'"); \
290  ak_call_abort(); \
291  }
292 
293  static void ak_call_abort()
294  {
295  abort();
296  }
297 #endif
298 
299 #if !defined(AKMALLOC_ASSERT)
300 # if !defined(NDEBUG)
301 # define AKMALLOC_ASSERT AKMALLOC_ASSERT_IMPL
302 # else
303 # define AKMALLOC_ASSERT(...) do { } while(0)
304 # endif
305 #endif
306 
307 #if !defined(AKMALLOC_ASSERT_ALWAYS)
308 # define AKMALLOC_ASSERT_ALWAYS AKMALLOC_ASSERT_IMPL
309 #endif
310 
311 #if !defined(AKMALLOC_DEBUG_PRINT)
312 // # define AKMALLOC_DEBUG_PRINT
313 #endif
314 
315 #if defined(AKMALLOC_DEBUG_PRINT)
316 # define DBG_PRINTF(...) fprintf(stdout, __VA_ARGS__); fflush(stdout)
317 #else
318 # define DBG_PRINTF(...) (void)0
319 #endif/*defined(AKMALLOC_DEBUG_PRINT)*/
320 /********************** assert end ************************/
321 
322 /********************** spinlock begin ************************/
323 typedef struct ak_spinlock_tag ak_spinlock;
324 
325 struct ak_spinlock_tag
326 {
327  ak_u32 islocked;
328 };
329 
330 static void ak_os_sleep(ak_u32 micros);
331 
332 static void ak_spinlock_yield();
333 
334 #if !AKMALLOC_MSVC && (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100
335 # define ak_atomic_cas(px, nx, ox) __sync_bool_compare_and_swap((px), (ox), (nx))
336 # define ak_atomic_xchg(px, nx) __sync_lock_test_and_set((px), (nx))
337 #else/* Windows */
338 # ifndef _M_AMD64
339  /* These are already defined on AMD64 builds */
340  AK_EXTERN_C_BEGIN
341  long __cdecl _InterlockedCompareExchange(long volatile* Target, long NewValue, long OldValue);
342  long __cdecl _InterlockedExchange(long volatile* Target, long NewValue);
343  AK_EXTERN_C_END
344 # pragma intrinsic (_InterlockedCompareExchange)
345 # endif /* _M_AMD64 */
346 # define ak_atomic_cas(px, nx, ox) (_InterlockedCompareExchange((volatile long*)(px), (nx), (ox)) == (ox))
347 # define ak_atomic_xchg(px, nx) _InterlockedExchange((volatile long*)(px), (nx))
348 #endif/* Windows */
349 
350 ak_inline static int ak_spinlock_is_locked(ak_spinlock* p)
351 {
352  return *(volatile ak_u32*)(&(p->islocked));
353 }
354 
355 ak_inline static void ak_spinlock_init(ak_spinlock* p)
356 {
357  p->islocked = 0;
358 }
359 
360 ak_inline static void ak_spinlock_acquire(ak_spinlock* p)
361 {
362  ak_u32 spins = 0;
363 
364 #if AKMALLOC_WINDOWS
365 # define SPINS_PER_YIELD 63
366 #elif AKMALLOC_MACOS || AKMALLOC_IOS
367 # define SPINS_PER_YIELD 15
368 #else
369 # define SPINS_PER_YIELD 31
370 #endif
371 
372  if (ak_atomic_xchg(&(p->islocked), 1)) {
373  while (ak_atomic_xchg(&(p->islocked), 1)) {
374  if ((++spins & SPINS_PER_YIELD) == 0) {
375 #if AKMALLOC_MACOS || AKMALLOC_IOS
376  ak_os_sleep(40);
377 #else
378  ak_spinlock_yield();
379 #endif
380  }
381  }
382  }
383 
384 #undef SPINS_PER_YIELD
385 
386  AKMALLOC_ASSERT(ak_spinlock_is_locked(p));
387 }
388 
389 ak_inline static void ak_spinlock_release(ak_spinlock* p)
390 {
391  AKMALLOC_ASSERT(ak_spinlock_is_locked(p));
392  ak_atomic_xchg(&(p->islocked), 0);
393 }
394 
395 #if AKMALLOC_WINDOWS
396 
397 #define WIN32_LEAN_AND_MEAN
398 #define NOMINMAX
399 #include <Windows.h>
400 
401 static void ak_os_sleep(ak_u32 micros)
402 {
403  SleepEx(micros / 1000, FALSE);
404 }
405 
406 static void ak_spinlock_yield()
407 {
408  SwitchToThread();
409 }
410 
411 #else
412 
413 #include <sched.h>
414 #include <unistd.h>
415 
416 static void ak_os_sleep(ak_u32 micros)
417 {
418  usleep(micros);
419 }
420 
421 static void ak_spinlock_yield()
422 {
423  sched_yield();
424 }
425 
426 #endif
427 /********************** spinlock end ************************/
428 
429 /********************** bitset begin ************************/
430 #if AKMALLOC_MSVC
431 #include <intrin.h>
432 #endif
433 
434 typedef ak_u32 ak_bitset32;
435 
436 ak_inline static int ak_bitset_all(const ak_bitset32* bs)
437 {
438  return (*bs == 0xFFFFFFFF) ? 1 : 0;
439 }
440 
441 ak_inline static int ak_bitset_any(const ak_bitset32* bs)
442 {
443  return *(const int*)bs;
444 }
445 
446 ak_inline static int ak_bitset_none(const ak_bitset32* bs)
447 {
448  return (*bs == 0x00000000) ? 1 : 0;
449 }
450 
451 ak_inline static void ak_bitset_set_all(ak_bitset32* bs)
452 {
453  *bs = 0xFFFFFFFF;
454 }
455 
456 ak_inline static void ak_bitset_clear_all(ak_bitset32* bs)
457 {
458  *bs = 0x00000000;
459 }
460 
461 #define ak_bitset_set(bs, i) \
462  (*(bs) = (*(bs) | (0x00000001 << (i))))
463 
464 #define ak_bitset_clear(bs, i) \
465  (*(bs) = (*(bs) & (~(0x00000001 << (i)))))
466 
467 ak_inline static void ak_bitset_set_n(ak_bitset32* bs, int i, int n)
468 {
469  ak_bitset32 mask = ~(0xFFFFFFFF << n);
470  *bs = *bs | (mask << i);
471 }
472 
473 ak_inline static void ak_bitset_clear_n(ak_bitset32* bs, int i, int n)
474 {
475  ak_bitset32 mask = ~(0xFFFFFFFF << n);
476  *bs = *bs & (~(mask << i));
477 }
478 
479 #define ak_bitset_get(bs, i) \
480  ((*(bs) & (0x00000001 << (i))))
481 
482 ak_inline static ak_bitset32 ak_bitset_get_n(const ak_bitset32* bs, int i, int n)
483 {
484  ak_bitset32 mask = ~(0xFFFFFFFF << n);
485  return (*bs & (mask << i)) >> i;
486 }
487 
488 #if AKMALLOC_MSVC
489 # define ak_bitset_fill_num_leading_zeros(bs, out) \
490  do { \
491  DWORD ldz = 0; \
492  out = (_BitScanReverse(&ldz, *(bs))) ? (31 - ldz) : 32; \
493  } while (0)
494 #else
495 # define ak_bitset_fill_num_leading_zeros(bs, out) \
496  out = (*bs) ? __builtin_clz(*bs) : 32
497 #endif
498 
499 ak_inline static int ak_bitset_num_leading_zeros(const ak_bitset32* bs)
500 {
501  int nlz;
502  ak_bitset_fill_num_leading_zeros(bs, nlz);
503  return nlz;
504 }
505 
506 #if AKMALLOC_MSVC
507 # define ak_bitset_fill_num_trailing_zeros(bs, out) \
508  do { \
509  DWORD trz = 0; \
510  out = (_BitScanForward(&trz, *(bs))) ? trz : 32; \
511  } while (0)
512 #else
513 # define ak_bitset_fill_num_trailing_zeros(bs, out) \
514  out = (*(bs)) ? __builtin_ctz(*(bs)) : 32
515 #endif
516 
517 ak_inline static int ak_bitset_num_trailing_zeros(const ak_bitset32* bs)
518 {
519  int ntz;
520  ak_bitset_fill_num_trailing_zeros(bs, ntz);
521  return ntz;
522 }
523 
524 ak_inline static int ak_bitset_num_leading_ones(const ak_bitset32* bs)
525 {
526  ak_bitset32 copy = ~(*bs);
527  return ak_bitset_num_leading_zeros(&copy);
528 }
529 
530 ak_inline static int ak_bitset_num_trailing_ones(const ak_bitset32* bs)
531 {
532  ak_bitset32 copy = ~(*bs);
533  return ak_bitset_num_trailing_zeros(&copy);
534 }
535 
536 ak_inline static void ak_bitset_flip(ak_bitset32* bs)
537 {
538  *bs = ~(*bs);
539 }
540 
541 /* ak_bitset512 */
542 
543 struct ak_bitset512_tag
544 {
545  ak_bitset32 a0;
546  ak_bitset32 a1;
547  ak_bitset32 a2;
548  ak_bitset32 a3;
549  ak_bitset32 a4;
550  ak_bitset32 a5;
551  ak_bitset32 a6;
552  ak_bitset32 a7;
553  ak_bitset32 a8;
554  ak_bitset32 a9;
555  ak_bitset32 a10;
556  ak_bitset32 a11;
557  ak_bitset32 a12;
558  ak_bitset32 a13;
559  ak_bitset32 a14;
560  ak_bitset32 a15;
561 };
562 
563 typedef struct ak_bitset512_tag ak_bitset512;
564 
565 ak_inline static int ak_bitset512_all(const ak_bitset512* bs)
566 {
567  return ak_bitset_all(&(bs->a0)) &&
568  ak_bitset_all(&(bs->a1)) &&
569  ak_bitset_all(&(bs->a2)) &&
570  ak_bitset_all(&(bs->a3)) &&
571  ak_bitset_all(&(bs->a4)) &&
572  ak_bitset_all(&(bs->a5)) &&
573  ak_bitset_all(&(bs->a6)) &&
574  ak_bitset_all(&(bs->a7)) &&
575  ak_bitset_all(&(bs->a8)) &&
576  ak_bitset_all(&(bs->a9)) &&
577  ak_bitset_all(&(bs->a10)) &&
578  ak_bitset_all(&(bs->a11)) &&
579  ak_bitset_all(&(bs->a12)) &&
580  ak_bitset_all(&(bs->a13)) &&
581  ak_bitset_all(&(bs->a14)) &&
582  ak_bitset_all(&(bs->a15));
583 }
584 
585 ak_inline static int ak_bitset512_any(const ak_bitset512* bs)
586 {
587  return ak_bitset_any(&(bs->a0)) ||
588  ak_bitset_any(&(bs->a1)) ||
589  ak_bitset_any(&(bs->a2)) ||
590  ak_bitset_any(&(bs->a3)) ||
591  ak_bitset_any(&(bs->a4)) ||
592  ak_bitset_any(&(bs->a5)) ||
593  ak_bitset_any(&(bs->a6)) ||
594  ak_bitset_any(&(bs->a7)) ||
595  ak_bitset_any(&(bs->a8)) ||
596  ak_bitset_any(&(bs->a9)) ||
597  ak_bitset_any(&(bs->a10)) ||
598  ak_bitset_any(&(bs->a11)) ||
599  ak_bitset_any(&(bs->a12)) ||
600  ak_bitset_any(&(bs->a13)) ||
601  ak_bitset_any(&(bs->a14)) ||
602  ak_bitset_any(&(bs->a15));
603 }
604 
605 ak_inline static int ak_bitset512_none(const ak_bitset512* bs)
606 {
607  return ak_bitset_none(&(bs->a0)) &&
608  ak_bitset_none(&(bs->a1)) &&
609  ak_bitset_none(&(bs->a2)) &&
610  ak_bitset_none(&(bs->a3)) &&
611  ak_bitset_none(&(bs->a4)) &&
612  ak_bitset_none(&(bs->a5)) &&
613  ak_bitset_none(&(bs->a6)) &&
614  ak_bitset_none(&(bs->a7)) &&
615  ak_bitset_none(&(bs->a8)) &&
616  ak_bitset_none(&(bs->a9)) &&
617  ak_bitset_none(&(bs->a10)) &&
618  ak_bitset_none(&(bs->a11)) &&
619  ak_bitset_none(&(bs->a12)) &&
620  ak_bitset_none(&(bs->a13)) &&
621  ak_bitset_none(&(bs->a14)) &&
622  ak_bitset_none(&(bs->a15));
623 }
624 
625 ak_inline static void ak_bitset512_set_all(ak_bitset512* bs)
626 {
627  ak_bitset_set_all(&(bs->a0));
628  ak_bitset_set_all(&(bs->a1));
629  ak_bitset_set_all(&(bs->a2));
630  ak_bitset_set_all(&(bs->a3));
631  ak_bitset_set_all(&(bs->a4));
632  ak_bitset_set_all(&(bs->a5));
633  ak_bitset_set_all(&(bs->a6));
634  ak_bitset_set_all(&(bs->a7));
635  ak_bitset_set_all(&(bs->a8));
636  ak_bitset_set_all(&(bs->a9));
637  ak_bitset_set_all(&(bs->a10));
638  ak_bitset_set_all(&(bs->a11));
639  ak_bitset_set_all(&(bs->a12));
640  ak_bitset_set_all(&(bs->a13));
641  ak_bitset_set_all(&(bs->a14));
642  ak_bitset_set_all(&(bs->a15));
643 }
644 
645 ak_inline static void ak_bitset512_clear_all(ak_bitset512* bs)
646 {
647  ak_bitset_clear_all(&(bs->a0));
648  ak_bitset_clear_all(&(bs->a1));
649  ak_bitset_clear_all(&(bs->a2));
650  ak_bitset_clear_all(&(bs->a3));
651  ak_bitset_clear_all(&(bs->a4));
652  ak_bitset_clear_all(&(bs->a5));
653  ak_bitset_clear_all(&(bs->a6));
654  ak_bitset_clear_all(&(bs->a7));
655  ak_bitset_clear_all(&(bs->a8));
656  ak_bitset_clear_all(&(bs->a9));
657  ak_bitset_clear_all(&(bs->a10));
658  ak_bitset_clear_all(&(bs->a11));
659  ak_bitset_clear_all(&(bs->a12));
660  ak_bitset_clear_all(&(bs->a13));
661  ak_bitset_clear_all(&(bs->a14));
662  ak_bitset_clear_all(&(bs->a15));
663 }
664 
665 ak_inline static void ak_bitset512_set(ak_bitset512* bs, int idx)
666 {
667  switch (idx >> 5) {
668  case 0:
669  ak_bitset_set(&(bs->a15), idx & 31);
670  return;
671  case 1:
672  ak_bitset_set(&(bs->a14), idx & 31);
673  return;
674  case 2:
675  ak_bitset_set(&(bs->a13), idx & 31);
676  return;
677  case 3:
678  ak_bitset_set(&(bs->a12), idx & 31);
679  return;
680  case 4:
681  ak_bitset_set(&(bs->a11), idx & 31);
682  return;
683  case 5:
684  ak_bitset_set(&(bs->a10), idx & 31);
685  return;
686  case 6:
687  ak_bitset_set(&(bs->a9), idx & 31);
688  return;
689  case 7:
690  ak_bitset_set(&(bs->a8), idx & 31);
691  return;
692  case 8:
693  ak_bitset_set(&(bs->a7), idx & 31);
694  return;
695  case 9:
696  ak_bitset_set(&(bs->a6), idx & 31);
697  return;
698  case 10:
699  ak_bitset_set(&(bs->a5), idx & 31);
700  return;
701  case 11:
702  ak_bitset_set(&(bs->a4), idx & 31);
703  return;
704  case 12:
705  ak_bitset_set(&(bs->a3), idx & 31);
706  return;
707  case 13:
708  ak_bitset_set(&(bs->a2), idx & 31);
709  return;
710  case 14:
711  ak_bitset_set(&(bs->a1), idx & 31);
712  return;
713  case 15:
714  ak_bitset_set(&(bs->a0), idx & 31);
715  return;
716  default:
717  AKMALLOC_ASSERT_ALWAYS(0 && "Invalid bitset index");
718  }
719  return;
720 }
721 
722 ak_inline static void ak_bitset512_clear(ak_bitset512* bs, int idx)
723 {
724  switch (idx >> 5) {
725  case 0:
726  ak_bitset_clear(&(bs->a15), idx & 31);
727  return;
728  case 1:
729  ak_bitset_clear(&(bs->a14), idx & 31);
730  return;
731  case 2:
732  ak_bitset_clear(&(bs->a13), idx & 31);
733  return;
734  case 3:
735  ak_bitset_clear(&(bs->a12), idx & 31);
736  return;
737  case 4:
738  ak_bitset_clear(&(bs->a11), idx & 31);
739  return;
740  case 5:
741  ak_bitset_clear(&(bs->a10), idx & 31);
742  return;
743  case 6:
744  ak_bitset_clear(&(bs->a9), idx & 31);
745  return;
746  case 7:
747  ak_bitset_clear(&(bs->a8), idx & 31);
748  return;
749  case 8:
750  ak_bitset_clear(&(bs->a7), idx & 31);
751  return;
752  case 9:
753  ak_bitset_clear(&(bs->a6), idx & 31);
754  return;
755  case 10:
756  ak_bitset_clear(&(bs->a5), idx & 31);
757  return;
758  case 11:
759  ak_bitset_clear(&(bs->a4), idx & 31);
760  return;
761  case 12:
762  ak_bitset_clear(&(bs->a3), idx & 31);
763  return;
764  case 13:
765  ak_bitset_clear(&(bs->a2), idx & 31);
766  return;
767  case 14:
768  ak_bitset_clear(&(bs->a1), idx & 31);
769  return;
770  case 15:
771  ak_bitset_clear(&(bs->a0), idx & 31);
772  return;
773  default:
774  AKMALLOC_ASSERT_ALWAYS(0 && "Invalid bitset index");
775  }
776  return;
777 }
778 
779 ak_inline static ak_bitset32 ak_bitset512_get(const ak_bitset512* bs, int idx)
780 {
781  switch (idx >> 5) {
782  case 0:
783  return ak_bitset_get(&(bs->a15), idx & 31);
784  case 1:
785  return ak_bitset_get(&(bs->a14), idx & 31);
786  case 2:
787  return ak_bitset_get(&(bs->a13), idx & 31);
788  case 3:
789  return ak_bitset_get(&(bs->a12), idx & 31);
790  case 4:
791  return ak_bitset_get(&(bs->a11), idx & 31);
792  case 5:
793  return ak_bitset_get(&(bs->a10), idx & 31);
794  case 6:
795  return ak_bitset_get(&(bs->a9), idx & 31);
796  case 7:
797  return ak_bitset_get(&(bs->a8), idx & 31);
798  case 8:
799  return ak_bitset_get(&(bs->a7), idx & 31);
800  case 9:
801  return ak_bitset_get(&(bs->a6), idx & 31);
802  case 10:
803  return ak_bitset_get(&(bs->a5), idx & 31);
804  case 11:
805  return ak_bitset_get(&(bs->a4), idx & 31);
806  case 12:
807  return ak_bitset_get(&(bs->a3), idx & 31);
808  case 13:
809  return ak_bitset_get(&(bs->a2), idx & 31);
810  case 14:
811  return ak_bitset_get(&(bs->a1), idx & 31);
812  case 15:
813  return ak_bitset_get(&(bs->a0), idx & 31);
814  default:
815  AKMALLOC_ASSERT_ALWAYS(0 && "Invalid bitset index");
816  return 0;
817  }
818 }
819 
820 #define ak_bitset512_fill_num_leading_zeros(bs, nlz) \
821  nlz = 0; \
822  { \
823  int cur = 0; \
824  ak_bitset_fill_num_leading_zeros(&(bs->a0), cur); \
825  nlz += cur; \
826  if (cur == 32) { \
827  ak_bitset_fill_num_leading_zeros(&(bs->a1), cur); \
828  nlz += cur; \
829  if (cur == 32) { \
830  ak_bitset_fill_num_leading_zeros(&(bs->a2), cur); \
831  nlz += cur; \
832  if (cur == 32) { \
833  ak_bitset_fill_num_leading_zeros(&(bs->a3), cur); \
834  nlz += cur; \
835  if (cur == 32) { \
836  ak_bitset_fill_num_leading_zeros(&(bs->a4), cur); \
837  nlz += cur; \
838  if (cur == 32) { \
839  ak_bitset_fill_num_leading_zeros(&(bs->a5), cur); \
840  nlz += cur; \
841  if (cur == 32) { \
842  ak_bitset_fill_num_leading_zeros(&(bs->a6), cur); \
843  nlz += cur; \
844  if (cur == 32) { \
845  ak_bitset_fill_num_leading_zeros(&(bs->a7), cur); \
846  nlz += cur; \
847  if (cur == 32) { \
848  ak_bitset_fill_num_leading_zeros(&(bs->a8), cur); \
849  nlz += cur; \
850  if (cur == 32) { \
851  ak_bitset_fill_num_leading_zeros(&(bs->a9), cur); \
852  nlz += cur; \
853  if (cur == 32) { \
854  ak_bitset_fill_num_leading_zeros(&(bs->a10), cur); \
855  nlz += cur; \
856  if (cur == 32) { \
857  ak_bitset_fill_num_leading_zeros(&(bs->a11), cur); \
858  nlz += cur; \
859  if (cur == 32) { \
860  ak_bitset_fill_num_leading_zeros(&(bs->a12), cur); \
861  nlz += cur; \
862  if (cur == 32) { \
863  ak_bitset_fill_num_leading_zeros(&(bs->a13), cur); \
864  nlz += cur; \
865  if (cur == 32) { \
866  ak_bitset_fill_num_leading_zeros(&(bs->a14), cur); \
867  nlz += cur; \
868  if (cur == 32) { \
869  ak_bitset_fill_num_leading_zeros(&(bs->a15), cur); \
870  nlz += cur; \
871  } } } } } } } } } } } } } } } \
872  }
873 
874 #define ak_bitset512_fill_num_leading_ones(bs, nlz) \
875  nlz = 0; \
876  { \
877  int cur = 0; \
878  ak_bitset32 mybs; \
879  mybs = ~(bs->a0); \
880  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
881  nlz += cur; \
882  if (cur == 32) { \
883  mybs = ~(bs->a1); \
884  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
885  nlz += cur; \
886  if (cur == 32) { \
887  mybs = ~(bs->a2); \
888  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
889  nlz += cur; \
890  if (cur == 32) { \
891  mybs = ~(bs->a3); \
892  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
893  nlz += cur; \
894  if (cur == 32) { \
895  mybs = ~(bs->a4); \
896  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
897  nlz += cur; \
898  if (cur == 32) { \
899  mybs = ~(bs->a5); \
900  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
901  nlz += cur; \
902  if (cur == 32) { \
903  mybs = ~(bs->a6); \
904  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
905  nlz += cur; \
906  if (cur == 32) { \
907  mybs = ~(bs->a7); \
908  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
909  nlz += cur; \
910  if (cur == 32) { \
911  mybs = ~(bs->a8); \
912  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
913  nlz += cur; \
914  if (cur == 32) { \
915  mybs = ~(bs->a9); \
916  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
917  nlz += cur; \
918  if (cur == 32) { \
919  mybs = ~(bs->a10); \
920  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
921  nlz += cur; \
922  if (cur == 32) { \
923  mybs = ~(bs->a11); \
924  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
925  nlz += cur; \
926  if (cur == 32) { \
927  mybs = ~(bs->a12); \
928  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
929  nlz += cur; \
930  if (cur == 32) { \
931  mybs = ~(bs->a13); \
932  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
933  nlz += cur; \
934  if (cur == 32) { \
935  mybs = ~(bs->a14); \
936  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
937  nlz += cur; \
938  if (cur == 32) { \
939  mybs = ~(bs->a15); \
940  ak_bitset_fill_num_leading_zeros(&mybs, cur); \
941  nlz += cur; \
942  } } } } } } } } } } } } } } } \
943  }
944 
945 ak_inline static int ak_bitset512_num_leading_zeros(const ak_bitset512* bs)
946 {
947  int nlz = 0;
948  ak_bitset512_fill_num_leading_zeros(bs, nlz);
949  return nlz;
950 }
951 
952 #define ak_bitset512_fill_num_trailing_zeros(bs, ntz) \
953  ntz = 0; \
954  { \
955  int cur = 0; \
956  ak_bitset_fill_num_trailing_zeros(&(bs->a15), cur); \
957  ntz += cur; \
958  if (cur == 32) { \
959  ak_bitset_fill_num_trailing_zeros(&(bs->a14), cur); \
960  ntz += cur; \
961  if (cur == 32) { \
962  ak_bitset_fill_num_trailing_zeros(&(bs->a13), cur); \
963  ntz += cur; \
964  if (cur == 32) { \
965  ak_bitset_fill_num_trailing_zeros(&(bs->a12), cur); \
966  ntz += cur; \
967  if (cur == 32) { \
968  ak_bitset_fill_num_trailing_zeros(&(bs->a11), cur); \
969  ntz += cur; \
970  if (cur == 32) { \
971  ak_bitset_fill_num_trailing_zeros(&(bs->a10), cur); \
972  ntz += cur; \
973  if (cur == 32) { \
974  ak_bitset_fill_num_trailing_zeros(&(bs->a9), cur); \
975  ntz += cur; \
976  if (cur == 32) { \
977  ak_bitset_fill_num_trailing_zeros(&(bs->a8), cur); \
978  ntz += cur; \
979  if (cur == 32) { \
980  ak_bitset_fill_num_trailing_zeros(&(bs->a7), cur); \
981  ntz += cur; \
982  if (cur == 32) { \
983  ak_bitset_fill_num_trailing_zeros(&(bs->a6), cur); \
984  ntz += cur; \
985  if (cur == 32) { \
986  ak_bitset_fill_num_trailing_zeros(&(bs->a5), cur); \
987  ntz += cur; \
988  if (cur == 32) { \
989  ak_bitset_fill_num_trailing_zeros(&(bs->a4), cur); \
990  ntz += cur; \
991  if (cur == 32) { \
992  ak_bitset_fill_num_trailing_zeros(&(bs->a3), cur); \
993  ntz += cur; \
994  if (cur == 32) { \
995  ak_bitset_fill_num_trailing_zeros(&(bs->a2), cur); \
996  ntz += cur; \
997  if (cur == 32) { \
998  ak_bitset_fill_num_trailing_zeros(&(bs->a1), cur); \
999  ntz += cur; \
1000  if (cur == 32) { \
1001  ak_bitset_fill_num_trailing_zeros(&(bs->a0), cur); \
1002  ntz += cur; \
1003  } } } } } } } } } } } } } } } \
1004  }
1005 
1006 #define ak_bitset512_fill_num_trailing_ones(bs, ntz) \
1007  ntz = 0; \
1008  { \
1009  int cur = 0; \
1010  ak_bitset32 mybs; \
1011  mybs = ~(bs->a15); \
1012  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1013  ntz += cur; \
1014  if (cur == 32) { \
1015  mybs = ~(bs->a14); \
1016  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1017  ntz += cur; \
1018  if (cur == 32) { \
1019  mybs = ~(bs->a13); \
1020  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1021  ntz += cur; \
1022  if (cur == 32) { \
1023  mybs = ~(bs->a12); \
1024  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1025  ntz += cur; \
1026  if (cur == 32) { \
1027  mybs = ~(bs->a11); \
1028  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1029  ntz += cur; \
1030  if (cur == 32) { \
1031  mybs = ~(bs->a10); \
1032  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1033  ntz += cur; \
1034  if (cur == 32) { \
1035  mybs = ~(bs->a9); \
1036  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1037  ntz += cur; \
1038  if (cur == 32) { \
1039  mybs = ~(bs->a8); \
1040  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1041  ntz += cur; \
1042  if (cur == 32) { \
1043  mybs = ~(bs->a7); \
1044  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1045  ntz += cur; \
1046  if (cur == 32) { \
1047  mybs = ~(bs->a6); \
1048  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1049  ntz += cur; \
1050  if (cur == 32) { \
1051  mybs = ~(bs->a5); \
1052  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1053  ntz += cur; \
1054  if (cur == 32) { \
1055  mybs = ~(bs->a4); \
1056  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1057  ntz += cur; \
1058  if (cur == 32) { \
1059  mybs = ~(bs->a3); \
1060  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1061  ntz += cur; \
1062  if (cur == 32) { \
1063  mybs = ~(bs->a2); \
1064  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1065  ntz += cur; \
1066  if (cur == 32) { \
1067  mybs = ~(bs->a1); \
1068  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1069  ntz += cur; \
1070  if (cur == 32) { \
1071  mybs = ~(bs->a0); \
1072  ak_bitset_fill_num_trailing_zeros(&mybs, cur); \
1073  ntz += cur; \
1074  } } } } } } } } } } } } } } } \
1075  }
1076 
1077 
1078 ak_inline static int ak_bitset512_num_trailing_zeros(const ak_bitset512* bs)
1079 {
1080  int ntz = 0;
1081  ak_bitset512_fill_num_trailing_zeros(bs, ntz);
1082  return ntz;
1083 }
1084 
1085 ak_inline static void ak_bitset512_flip(ak_bitset512* bs)
1086 {
1087  ak_bitset_flip(&(bs->a0));
1088  ak_bitset_flip(&(bs->a1));
1089  ak_bitset_flip(&(bs->a2));
1090  ak_bitset_flip(&(bs->a3));
1091  ak_bitset_flip(&(bs->a4));
1092  ak_bitset_flip(&(bs->a5));
1093  ak_bitset_flip(&(bs->a6));
1094  ak_bitset_flip(&(bs->a7));
1095  ak_bitset_flip(&(bs->a8));
1096  ak_bitset_flip(&(bs->a9));
1097  ak_bitset_flip(&(bs->a10));
1098  ak_bitset_flip(&(bs->a11));
1099  ak_bitset_flip(&(bs->a12));
1100  ak_bitset_flip(&(bs->a13));
1101  ak_bitset_flip(&(bs->a14));
1102  ak_bitset_flip(&(bs->a15));
1103 }
1104 
1105 ak_inline static int ak_bitset512_num_leading_ones(const ak_bitset512* bs)
1106 {
1107  int nlo;
1108  ak_bitset512_fill_num_leading_ones(bs, nlo);
1109  return nlo;
1110 }
1111 
1112 ak_inline static int ak_bitset512_num_trailing_ones(const ak_bitset512* bs)
1113 {
1114  int nto;
1115  ak_bitset512_fill_num_trailing_ones(bs, nto);
1116  return nto;
1117 }
1118 /********************** bitset end ************************/
1119 
1120 /********************** os alloc begin ********/
1121 #if defined(AKMALLOC_GETPAGESIZE)
1122 # error "Page size can only be set to an internal default."
1123 #else
1124 # define AKMALLOC_GETPAGESIZE AKMALLOC_DEFAULT_GETPAGESIZE
1125 #endif
1126 
1127 #if !defined(AKMALLOC_MMAP) && !defined(AKMALLOC_MUNMAP)
1128 # define AKMALLOC_MMAP AKMALLOC_DEFAULT_MMAP
1129 # define AKMALLOC_MUNMAP AKMALLOC_DEFAULT_MUNMAP
1130 #elif defined(AKMALLOC_MMAP) && defined(AKMALLOC_MUNMAP)
1131 /* do nothing */
1132 #else
1133 # error "AKMALLOC_MMAP and AKMALLOC_MUNMAP not defined simultaneously."
1134 #endif
1135 
1136 /***********************************************
1137  * OS Allocation
1138  ***********************************************/
1139 
1140 #if AKMALLOC_WINDOWS
1141 
1142 #define WIN32_LEAN_AND_MEAN
1143 #define NOMINMAX
1144 #include <Windows.h>
1145 
1146 ak_inline static ak_sz ak_page_size()
1147 {
1148  static ak_sz PGSZ = 0;
1149  if (ak_unlikely(!PGSZ)) {
1150  SYSTEM_INFO si;
1151  GetSystemInfo(&si);
1152  PGSZ = si.dwPageSize;
1153  }
1154  return PGSZ;
1155 }
1156 
1157 ak_inline static void* ak_mmap(ak_sz s)
1158 {
1159  return VirtualAlloc(0, s, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
1160 }
1161 
1162 ak_inline static void ak_munmap(void* p, ak_sz s)
1163 {
1164  (void)VirtualFree(p, s, MEM_RELEASE);
1165 }
1166 
1167 #else
1168 
1169 #include <sys/mman.h>
1170 
1171 ak_inline static void* ak_mmap(ak_sz s)
1172 {
1173  void* addr = mmap(0, s, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
1174  return (addr == (void*)AK_SZ_MAX) ? 0 : addr;
1175 }
1176 
1177 ak_inline static void ak_munmap(void* p, ak_sz s)
1178 {
1179  (void)munmap(p, s);
1180 }
1181 
1182 #include <unistd.h>
1183 
1184 ak_inline static ak_sz ak_page_size()
1185 {
1186  static ak_sz pgsz = 0;
1187  if (ak_unlikely(!pgsz)) {
1188  pgsz = sysconf(_SC_PAGESIZE);
1189  }
1190  return pgsz;
1191 }
1192 
1193 #endif
1194 
1195 #define AKMALLOC_DEFAULT_GETPAGESIZE ak_page_size
1196 #define AKMALLOC_DEFAULT_MMAP(s) ak_mmap((s))
1197 #define AKMALLOC_DEFAULT_MUNMAP(a, s) ak_munmap((a), (s))
1198 
1199 static void* ak_os_alloc(size_t sz)
1200 {
1201  static const ak_sz pgsz = AKMALLOC_DEFAULT_PAGE_SIZE;
1202  (void)(pgsz);
1203  AKMALLOC_ASSERT_ALWAYS(pgsz == ak_page_size());
1204  void* mem = AKMALLOC_MMAP(sz);
1205  DBG_PRINTF("osmap,%p,%zu,%zu pages,iswhole %d\n", mem, sz, sz/AKMALLOC_DEFAULT_PAGE_SIZE, sz == AKMALLOC_DEFAULT_PAGE_SIZE*(sz/AKMALLOC_DEFAULT_PAGE_SIZE));
1206  return mem;
1207 }
1208 
1209 static void ak_os_free(void* p, size_t sz)
1210 {
1211  DBG_PRINTF("osunmap,%p,%zu\n", p, sz);
1212  AKMALLOC_MUNMAP(p, sz);
1213 }
1214 
1215 ak_inline static void* ak_page_start_before(void* p)
1216 {
1217  return (void*)((ak_sz)p & (~(ak_sz)(AKMALLOC_DEFAULT_PAGE_SIZE - 1)));
1218 }
1219 
1220 ak_inline static const void* ak_page_start_before_const(const void* p)
1221 {
1222  return (void*)((ak_sz)p & (~(ak_sz)(AKMALLOC_DEFAULT_PAGE_SIZE - 1)));
1223 }
1224 /********************** os alloc end **********/
1225 
1226 /********************** mallocstate config begin **********/
1227 
1231 #if !defined(AKMALLOC_USE_LOCKS) || AKMALLOC_USE_LOCKS
1232 # define AK_MALLOCSTATE_USE_LOCKS
1233 #endif
1234 
1235 #if defined(AK_MALLOCSTATE_USE_LOCKS)
1236 # define AK_SLAB_USE_LOCKS
1237 # define AK_CA_USE_LOCKS
1238 # define AKMALLOC_LOCK_DEFINE(nm) ak_spinlock nm
1239 # define AKMALLOC_LOCK_INIT(lk) ak_spinlock_init((lk))
1240 # define AKMALLOC_LOCK_ACQUIRE(lk) ak_spinlock_acquire((lk))
1241 # define AKMALLOC_LOCK_RELEASE(lk) ak_spinlock_release((lk))
1242 #else
1243 # define AKMALLOC_LOCK_DEFINE(nm)
1244 # define AKMALLOC_LOCK_INIT(lk)
1245 # define AKMALLOC_LOCK_ACQUIRE(lk)
1246 # define AKMALLOC_LOCK_RELEASE(lk)
1247 #endif
1248 
1249 #if !defined(AK_COALESCE_SEGMENT_GRANULARITY)
1250 # define AK_COALESCE_SEGMENT_GRANULARITY (((size_t)1) << 18) /* 256KB */
1251 #endif
1252 
1253 #if !defined(AK_SEG_CBK_DEFINED)
1254 
1261 typedef int(*ak_seg_cbk)(const void*, size_t);
1262 #endif
1263 
1264 /********************** mallocstate config end ************/
1265 
1266 /********************** slab begin **********************/
1312 typedef struct ak_slab_tag ak_slab;
1313 
1314 typedef struct ak_slab_root_tag ak_slab_root;
1315 
1316 #if defined(AK_SLAB_USE_LOCKS)
1317 # define AK_SLAB_LOCK_DEFINE(nm) ak_spinlock nm
1318 # define AK_SLAB_LOCK_INIT(root) ak_spinlock_init(ak_as_ptr((root)->LOCKED))
1319 # define AK_SLAB_LOCK_ACQUIRE(root) ak_spinlock_acquire(ak_as_ptr((root)->LOCKED))
1320 # define AK_SLAB_LOCK_RELEASE(root) ak_spinlock_release(ak_as_ptr((root)->LOCKED))
1321 #else
1322 # define AK_SLAB_LOCK_DEFINE(nm)
1323 # define AK_SLAB_LOCK_INIT(root)
1324 # define AK_SLAB_LOCK_ACQUIRE(root)
1325 # define AK_SLAB_LOCK_RELEASE(root)
1326 #endif
1327 
1328 struct ak_slab_tag
1329 {
1330  ak_slab* fd;
1331  ak_slab* bk;
1332  ak_slab_root* root;
1333  ak_bitset512 avail;
1334  void* _unused;
1335 };
1336 
1341 {
1342  ak_u32 sz;
1343  ak_u32 npages;
1344  ak_u32 navail;
1345  ak_u32 nempty;
1346  ak_u32 release;
1347  ak_u32 _unused;
1349  ak_slab partial_root;
1350  ak_slab full_root;
1351  ak_slab empty_root;
1353  ak_u32 RELEASE_RATE;
1355  AK_SLAB_LOCK_DEFINE(LOCKED);
1356 };
1357 
1358 #if !defined(AK_SLAB_RELEASE_RATE)
1359 # define AK_SLAB_RELEASE_RATE 127
1360 #endif
1361 
1362 #if !defined(AK_SLAB_MAX_PAGES_TO_FREE)
1363 # define AK_SLAB_MAX_PAGES_TO_FREE AK_SLAB_RELEASE_RATE
1364 #endif
1365 
1366 /**************************************************************/
1367 /* P R I V A T E */
1368 /**************************************************************/
1369 
1370 #define ak_slab_unlink(slab) \
1371  do { \
1372  ak_slab* const sU = (slab); \
1373  sU->bk->fd = (sU->fd); \
1374  sU->fd->bk = (sU->bk); \
1375  sU->fd = sU->bk = AK_NULLPTR; \
1376  } while (0)
1377 
1378 #define ak_slab_link_fd(slab, fwd) \
1379  do { \
1380  ak_slab* const sLF = (slab); \
1381  ak_slab* const fLF = (fwd); \
1382  sLF->fd = fLF; \
1383  fLF->bk = sLF; \
1384  } while (0)
1385 
1386 #define ak_slab_link_bk(slab, back) \
1387  do { \
1388  ak_slab* const sLB = (slab); \
1389  ak_slab* const bLB = (back); \
1390  sLB->bk = bLB; \
1391  bLB->fd = sLB; \
1392  } while (0)
1393 
1394 #define ak_slab_link(slab, fwd, back) \
1395  do { \
1396  ak_slab* const sL = (slab); \
1397  ak_slab* const fL = (fwd); \
1398  ak_slab* const bL = (back); \
1399  ak_slab_link_bk(sL, bL); \
1400  ak_slab_link_fd(sL, fL); \
1401  } while (0)
1402 
1403 ak_inline static void ak_slab_init_chain_head(ak_slab* s, ak_slab_root* rootp)
1404 {
1405  s->fd = s->bk = s;
1406  s->root = rootp;
1407  ak_bitset512_clear_all(&(s->avail));
1408 }
1409 
1410 ak_inline static ak_sz ak_num_pages_for_sz(ak_sz sz)
1411 {
1412  return (sz)/4;
1413 }
1414 
1415 #define ak_slab_init(m, s, av, r) \
1416  do { \
1417  void* slabmem = (m); \
1418  ak_sz slabsz = (s); \
1419  ak_sz slabnavail = (av); \
1420  ak_slab_root* slabroot = (r); \
1421  \
1422  AKMALLOC_ASSERT(slabmem); \
1423  AKMALLOC_ASSERT(slabsz < (AKMALLOC_DEFAULT_PAGE_SIZE - sizeof(ak_slab))); \
1424  AKMALLOC_ASSERT(slabsz > 0); \
1425  AKMALLOC_ASSERT(slabsz % 2 == 0); \
1426  \
1427  AKMALLOC_ASSERT(slabnavail < 512); \
1428  AKMALLOC_ASSERT(slabnavail > 0); \
1429  \
1430  ak_slab* s = (ak_slab*)slabmem; \
1431  s->fd = s->bk = AK_NULLPTR; \
1432  s->root = slabroot; \
1433  ak_bitset512_clear_all(&(s->avail)); \
1434  int inavail = (int)slabnavail; \
1435  for (int i = 0; i < inavail; ++i) { \
1436  ak_bitset512_set(&(s->avail), i); \
1437  } \
1438  (void)slabsz; \
1439  } while (0)
1440 
1441 ak_inline static ak_slab* ak_slab_new_init(char* mem, ak_sz sz, ak_sz navail, ak_slab* fd, ak_slab* bk, ak_slab_root* root)
1442 {
1443  ak_slab_init(mem, sz, navail, root);
1444  ak_slab* slab = ak_ptr_cast(ak_slab, mem);
1445  ak_slab_link(slab, fd, bk);
1446  return slab;
1447 }
1448 
1449 static ak_slab* ak_slab_new_alloc(ak_sz sz, ak_slab* fd, ak_slab* bk, ak_slab_root* root)
1450 {
1451  const int NPAGES = root->npages;
1452 
1453  // try to acquire a page and fit as many slabs as possible in
1454  char* const mem = (char*)ak_os_alloc(NPAGES * AKMALLOC_DEFAULT_PAGE_SIZE);
1455  {// return if no mem
1456  if (ak_unlikely(!mem)) { return AK_NULLPTR; }
1457  }
1458 
1459  ak_sz navail = root->navail;
1460 
1461  char* cmem = mem;
1462  for (int i = 0; i < NPAGES - 1; ++i) {
1463  ak_slab* nextpage = ak_ptr_cast(ak_slab, (cmem + AKMALLOC_DEFAULT_PAGE_SIZE));
1464  ak_slab* curr = ak_slab_new_init(cmem, sz, navail, nextpage, bk, root);
1465  AKMALLOC_ASSERT(ak_bitset512_num_trailing_ones(&(curr->avail)) == (int)navail);
1466  (void)curr;
1467  bk = nextpage;
1468  cmem += AKMALLOC_DEFAULT_PAGE_SIZE;
1469  }
1470 
1471  ak_slab_new_init(cmem, sz, navail, fd, bk, root);
1472 
1473  return ak_ptr_cast(ak_slab, mem);
1474 }
1475 
1476 static ak_slab* ak_slab_new_reuse(ak_sz sz, ak_slab* fd, ak_slab* bk, ak_slab_root* root)
1477 {
1478  AKMALLOC_ASSERT(root->nempty >= 1);
1479 
1480  ak_sz navail = root->navail;
1481  ak_slab* const curr = root->empty_root.fd;
1482  ak_slab_unlink(curr);
1483  ak_slab_new_init((char*)curr, sz, navail, fd, bk, root);
1484 
1485  --(root->nempty);
1486 
1487  return curr;
1488 }
1489 
1490 ak_inline static ak_slab* ak_slab_new(ak_sz sz, ak_slab* fd, ak_slab* bk, ak_slab_root* root)
1491 {
1492  return (root->nempty > 0)
1493  ? ak_slab_new_reuse(sz, fd, bk, root)
1494  : ak_slab_new_alloc(sz, fd, bk, root);
1495 }
1496 
1497 #define ak_slab_2_mem(s) (char*)(void*)((s) + 1)
1498 
1499 ak_inline static int ak_slab_all_free(ak_slab* s)
1500 {
1501  const ak_bitset512* pavail = &(s->avail);
1502  ak_u32 nto;
1503  ak_bitset512_fill_num_trailing_ones(pavail, nto);
1504  return nto == s->root->navail;
1505 }
1506 
1507 ak_inline static int ak_slab_none_free(ak_slab* s)
1508 {
1509  const ak_bitset512* pavail = &(s->avail);
1510  int ntz;
1511  ak_bitset512_fill_num_trailing_zeros(pavail, ntz);
1512  return ntz == 512;
1513 }
1514 
1515 static void* ak_slab_search(ak_slab* s, ak_sz sz, ak_u32 navail, ak_slab** pslab, int* pntz)
1516 {
1517  const ak_slab* const root = s;
1518  void* mem = AK_NULLPTR;
1519  if (ak_likely(s->fd != root)) {
1520  AKMALLOC_ASSERT(pslab);
1521  AKMALLOC_ASSERT(pntz);
1522  s = s->fd;
1523 
1524  // partial list entry must not be full
1525  AKMALLOC_ASSERT(ak_bitset512_num_trailing_zeros(&(s->avail)) != 512);
1526 
1527  const ak_bitset512* pavail = &(s->avail);
1528  int ntz;
1529  ak_bitset512_fill_num_trailing_zeros(pavail, ntz);
1530 
1531  AKMALLOC_ASSERT(ak_bitset512_get(&(s->avail), ntz));
1532  ak_bitset512_clear(&(s->avail), ntz);
1533  mem = ak_slab_2_mem(s) + (ntz * sz);
1534 
1535  *pslab = s;
1536  if (ntz == (int)navail - 1) {
1537  ntz = 512;
1538  } else {
1539  ak_bitset512_fill_num_trailing_zeros(pavail, ntz);
1540  }
1541  *pntz = ntz;
1542  }
1543  return mem;
1544 }
1545 
1546 static void ak_slab_release_pages(ak_slab_root* root, ak_slab* s, ak_u32 numtofree)
1547 {
1548  ak_slab* const r = s;
1549  ak_slab* next = AK_NULLPTR;
1550  s = s->fd;
1551  for (ak_u32 ct = 0; ct < numtofree; ++ct) {
1552  if (s == r) {
1553  break;
1554  } else {
1555  next = s->fd;
1556  }
1557  ak_slab_unlink(s);
1558  ak_os_free(s, AKMALLOC_DEFAULT_PAGE_SIZE);
1559  s = next;
1560  }
1561 }
1562 
1563 ak_inline static void ak_slab_release_os_mem(ak_slab_root* root)
1564 {
1565  ak_u32 numtofree = root->nempty;
1566  numtofree = (numtofree > root->MAX_PAGES_TO_FREE)
1567  ? root->MAX_PAGES_TO_FREE
1568  : numtofree;
1569  ak_slab_release_pages(root, &(root->empty_root), numtofree);
1570  root->nempty -= numtofree;
1571  root->release = 0;
1572 }
1573 
1574 /**************************************************************/
1575 /* P U B L I C */
1576 /**************************************************************/
1577 
1586 static void ak_slab_init_root(ak_slab_root* s, ak_sz sz, ak_u32 npages, ak_u32 relrate, ak_u32 maxpagefree)
1587 {
1588  s->sz = (ak_u32)sz;
1589  s->navail = (ak_u32)(AKMALLOC_DEFAULT_PAGE_SIZE - sizeof(ak_slab))/(ak_u32)sz;
1590  s->npages = npages;
1591  s->nempty = 0;
1592  s->release = 0;
1593 
1594  ak_slab_init_chain_head(&(s->partial_root), s);
1595  ak_slab_init_chain_head(&(s->full_root), s);
1596  ak_slab_init_chain_head(&(s->empty_root), s);
1597 
1598  s->RELEASE_RATE = relrate;
1599  s->MAX_PAGES_TO_FREE = maxpagefree;
1600  AK_SLAB_LOCK_INIT(s);
1601 }
1602 
1608 ak_inline static void ak_slab_init_root_default(ak_slab_root* s, ak_sz sz)
1609 {
1610  ak_slab_init_root(s, sz, (ak_u32)ak_num_pages_for_sz(sz), (ak_u32)(AK_SLAB_RELEASE_RATE), (ak_u32)(AK_SLAB_MAX_PAGES_TO_FREE));
1611 }
1612 
1619 ak_inline static void* ak_slab_alloc(ak_slab_root* root)
1620 {
1621  int ntz = 0;
1622  ak_slab* slab = AK_NULLPTR;
1623 
1624  AK_SLAB_LOCK_ACQUIRE(root);
1625  const ak_sz sz = root->sz;
1626 
1627  void* mem = ak_slab_search(&(root->partial_root), sz, root->navail, &slab, &ntz);
1628 
1629  if (ak_unlikely(!mem)) {
1630  slab = ak_slab_new(sz, root->partial_root.fd, &(root->partial_root), root);
1631  if (ak_likely(slab)) {
1632  AKMALLOC_ASSERT(ak_bitset512_get(&(slab->avail), 0));
1633  ak_bitset512_clear(&(slab->avail), 0);
1634  mem = ak_slab_2_mem(slab);
1635  }
1636  } else if (ak_unlikely(ntz == 512)) {
1637  ak_slab_unlink(slab);
1638  ak_slab_link(slab, root->full_root.fd, &(root->full_root));
1639  }
1640 
1641  AK_SLAB_LOCK_RELEASE(root);
1642 
1643  return mem;
1644 }
1645 
1650 ak_inline static void ak_slab_free(void* p)
1651 {
1652  char* mem = (char*)p;
1653 
1654  // round to page
1655  ak_slab* slab = (ak_slab*)(ak_page_start_before(p));
1656  AKMALLOC_ASSERT(slab->root);
1657 
1658  ak_slab_root* root = slab->root;
1659  AK_SLAB_LOCK_ACQUIRE(root);
1660 
1661  int movetopartial = ak_slab_none_free(slab);
1662  const ak_sz sz = root->sz;
1663 
1664  int idx = (int)(mem - (char*)ak_slab_2_mem(slab))/(int)sz;
1665  AKMALLOC_ASSERT(!ak_bitset512_get(&(slab->avail), idx));
1666  ak_bitset512_set(&(slab->avail), idx);
1667 
1668  if (ak_unlikely(movetopartial)) {
1669  // put at the back of the partial list so the full ones
1670  // appear at the front
1671  ak_slab_unlink(slab);
1672  ak_slab_link(slab, &(root->partial_root), root->partial_root.bk);
1673  } else if (ak_unlikely(ak_slab_all_free(slab))) {
1674  ak_slab_unlink(slab);
1675  ak_slab_link(slab, root->empty_root.fd, &(root->empty_root));
1676  ++(root->nempty); ++(root->release);
1677  if (root->release >= root->RELEASE_RATE) {
1678  ak_slab_release_os_mem(root);
1679  }
1680  }
1681 
1682  AK_SLAB_LOCK_RELEASE(root);
1683 }
1684 
1689 static void ak_slab_destroy(ak_slab_root* root)
1690 {
1691  ak_slab_release_pages(root, &(root->empty_root), AK_U32_MAX);
1692  ak_slab_release_pages(root, &(root->partial_root), AK_U32_MAX);
1693  ak_slab_release_pages(root, &(root->full_root), AK_U32_MAX);
1694  root->nempty = 0;
1695  root->release = 0;
1696 }
1697 /********************** slab end ************************/
1698 
1699 /********************** coalescing allocator begin **********************/
1761 #define AK_COALESCE_ALIGN 16
1762 
1763 #if !defined(AK_COALESCE_SEGMENT_GRANULARITY)
1764 # define AK_COALESCE_SEGMENT_GRANULARITY 65536
1765 #endif
1766 
1767 #if !defined(AK_COALESCE_SEGMENT_SIZE)
1768 /* 64KB */
1769 # define AK_COALESCE_SEGMENT_SIZE AK_COALESCE_SEGMENT_GRANULARITY
1770 #endif
1771 
1772 #if defined(AK_CA_USE_LOCKS)
1773 # define AK_CA_LOCK_DEFINE(nm) ak_spinlock nm
1774 # define AK_CA_LOCK_INIT(root) ak_spinlock_init(ak_as_ptr((root)->LOCKED))
1775 # define AK_CA_LOCK_ACQUIRE(root) ak_spinlock_acquire(ak_as_ptr((root)->LOCKED))
1776 # define AK_CA_LOCK_RELEASE(root) ak_spinlock_release(ak_as_ptr((root)->LOCKED))
1777 #else
1778 # define AK_CA_LOCK_DEFINE(nm)
1779 # define AK_CA_LOCK_INIT(root)
1780 # define AK_CA_LOCK_ACQUIRE(root)
1781 # define AK_CA_LOCK_RELEASE(root)
1782 #endif
1783 
1784 typedef ak_sz ak_alloc_info;
1785 
1786 typedef struct ak_alloc_node_tag ak_alloc_node;
1787 
1788 typedef struct ak_free_list_node_tag ak_free_list_node;
1789 
1790 typedef struct ak_ca_segment_tag ak_ca_segment;
1791 
1792 typedef struct ak_ca_root_tag ak_ca_root;
1793 
1794 struct ak_alloc_node_tag
1795 {
1796 #if AKMALLOC_BITNESS == 32
1797  ak_alloc_info _unused0;
1798  ak_alloc_info _unused1;
1799 #endif
1800  ak_alloc_info previnfo;
1801  ak_alloc_info currinfo;
1802 };
1803 
1804 struct ak_free_list_node_tag
1805 {
1806  ak_free_list_node* bk;
1807  ak_free_list_node* fd;
1808 };
1809 
1810 struct ak_ca_segment_tag
1811 {
1812  ak_ca_segment* bk;
1813  ak_ca_segment* fd;
1814  ak_sz sz;
1815  ak_alloc_node* head;
1816 };
1817 
1822 {
1823  ak_ca_segment main_root;
1824  ak_ca_segment empty_root;
1826  ak_free_list_node free_root;
1828  ak_u32 nempty;
1829  ak_u32 release;
1831  ak_u32 RELEASE_RATE;
1836  AK_CA_LOCK_DEFINE(LOCKED);
1837 };
1838 
1839 /**************************************************************/
1840 /* P R I V A T E */
1841 /**************************************************************/
1842 
1843 #define ak_ca_to_sz(p) (((ak_sz)(p)) & ~(AK_COALESCE_ALIGN - 1))
1844 
1845 #define ak_ca_is_first(p) (((ak_sz)(p)) & (AK_SZ_ONE << 0))
1846 
1847 #define ak_ca_is_last(p) (((ak_sz)(p)) & (AK_SZ_ONE << 1))
1848 
1849 #define ak_ca_is_free(p) (((ak_sz)(p)) & (AK_SZ_ONE << 2))
1850 
1851 ak_inline static void ak_ca_set_sz(ak_alloc_info* p, ak_sz sz)
1852 {
1853  AKMALLOC_ASSERT(sz == ak_ca_to_sz((ak_alloc_info)sz));
1854  // 7 because there are only 3 useful bits. the fourth bit may collect garbage.
1855  *p = (ak_alloc_info)((((ak_sz)*p) & ((ak_sz)7)) |
1856  (sz & ~(AK_COALESCE_ALIGN - 1)));
1857 }
1858 
1859 ak_inline static void ak_ca_set_is_first(ak_alloc_info* p, int v)
1860 {
1861  *p = (ak_alloc_info)((((ak_sz)*p) & ~(AK_SZ_ONE << 0)) | (v ? (AK_SZ_ONE << 0) : 0));
1862 }
1863 
1864 ak_inline static void ak_ca_set_is_last(ak_alloc_info* p, int v)
1865 {
1866  *p = (ak_alloc_info)((((ak_sz)*p) & ~(AK_SZ_ONE << 1)) | (v ? (AK_SZ_ONE << 1) : 0));
1867 }
1868 
1869 ak_inline static void ak_ca_set_is_free(ak_alloc_info* p, int v)
1870 {
1871  *p = (ak_alloc_info)((((ak_sz)*p) & ~(AK_SZ_ONE << 2)) | (v ? (AK_SZ_ONE << 2) : 0));
1872 }
1873 
1874 ak_inline static ak_alloc_node* ak_ca_next_node(ak_alloc_node* node)
1875 {
1876  return ak_ca_is_last(node->currinfo)
1877  ? AK_NULLPTR
1878  : ak_ptr_cast(ak_alloc_node,((char*)(node + 1) + ak_ca_to_sz(node->currinfo)));
1879 }
1880 
1881 ak_inline static ak_alloc_node* ak_ca_prev_node(ak_alloc_node* node)
1882 {
1883  return ak_ca_is_first(node->currinfo)
1884  ? AK_NULLPTR
1885  : ak_ptr_cast(ak_alloc_node, ((char*)(node - 1) - ak_ca_to_sz(node->previnfo)));
1886 }
1887 
1888 ak_inline static void ak_ca_update_footer(ak_alloc_node* p)
1889 {
1890  ak_alloc_node* n = ak_ca_next_node(p);
1891  if (n) {
1892  n->previnfo = p->currinfo;
1893  }
1894 }
1895 
1896 #define ak_free_list_node_unlink(node) \
1897  do { \
1898  ak_free_list_node* const sU = (node); \
1899  sU->bk->fd = (sU->fd); \
1900  sU->fd->bk = (sU->bk); \
1901  sU->fd = sU->bk = AK_NULLPTR; \
1902  } while (0)
1903 
1904 #define ak_free_list_node_link_fd(node, fwd) \
1905  do { \
1906  ak_free_list_node* const sLF = (node); \
1907  ak_free_list_node* const fLF = (fwd); \
1908  sLF->fd = fLF; \
1909  fLF->bk = sLF; \
1910  } while (0)
1911 
1912 #define ak_free_list_node_link_bk(node, back) \
1913  do { \
1914  ak_free_list_node* const sLB = (node); \
1915  ak_free_list_node* const bLB = (back); \
1916  sLB->bk = bLB; \
1917  bLB->fd = sLB; \
1918  } while (0)
1919 
1920 #define ak_free_list_node_link(node, fwd, back) \
1921  do { \
1922  ak_free_list_node* const sL = (node); \
1923  ak_free_list_node* const fL = (fwd); \
1924  ak_free_list_node* const bL = (back); \
1925  ak_free_list_node_link_bk(sL, bL); \
1926  ak_free_list_node_link_fd(sL, fL); \
1927  } while (0)
1928 
1929 #define ak_ca_segment_unlink(node) \
1930  do { \
1931  ak_ca_segment* const sU = (node); \
1932  sU->bk->fd = (sU->fd); \
1933  sU->fd->bk = (sU->bk); \
1934  sU->fd = sU->bk = AK_NULLPTR; \
1935  } while (0)
1936 
1937 #define ak_ca_segment_link_fd(node, fwd) \
1938  do { \
1939  ak_ca_segment* const sLF = (node); \
1940  ak_ca_segment* const fLF = (fwd); \
1941  sLF->fd = fLF; \
1942  fLF->bk = sLF; \
1943  } while (0)
1944 
1945 #define ak_ca_segment_link_bk(node, back) \
1946  do { \
1947  ak_ca_segment* const sLB = (node); \
1948  ak_ca_segment* const bLB = (back); \
1949  sLB->bk = bLB; \
1950  bLB->fd = sLB; \
1951  } while (0)
1952 
1953 #define ak_ca_segment_link(node, fwd, back) \
1954  do { \
1955  ak_ca_segment* const sL = (node); \
1956  ak_ca_segment* const fL = (fwd); \
1957  ak_ca_segment* const bL = (back); \
1958  ak_ca_segment_link_bk(sL, bL); \
1959  ak_ca_segment_link_fd(sL, fL); \
1960  } while (0)
1961 
1962 #define ak_circ_list_for_each(type, name, list) \
1963  type* name = (list)->fd; \
1964  for(type* const iterroot = (list); name != iterroot; name = name->fd)
1965 
1966 #define ak_ca_aligned_size(x) ((x) ? (((x) + AK_COALESCE_ALIGN - 1) & ~(AK_COALESCE_ALIGN - 1)) : AK_COALESCE_ALIGN)
1967 
1968 #define ak_ca_aligned_segment_size(x) (((x) + (AK_COALESCE_SEGMENT_SIZE) - 1) & ~((AK_COALESCE_SEGMENT_SIZE) - 1))
1969 
1970 ak_inline static void* ak_ca_search_free_list(ak_free_list_node* root, ak_sz sz, ak_sz splitsz)
1971 {
1972  AKMALLOC_ASSERT(splitsz >= sizeof(ak_free_list_node));
1973  AKMALLOC_ASSERT(splitsz % AK_COALESCE_ALIGN == 0);
1974 
1975  // add the overhead per node
1976  splitsz += sizeof(ak_alloc_node);
1977 
1978  // walk through list finding the first element that fits and split if required
1979  ak_circ_list_for_each(ak_free_list_node, node, root) {
1980  ak_alloc_node* n = ((ak_alloc_node*)(node)) - 1;
1981  AKMALLOC_ASSERT(ak_ca_is_free(n->currinfo));
1982  ak_sz nodesz = ak_ca_to_sz(n->currinfo);
1983  if (nodesz >= sz) {
1984  if ((nodesz - sz) > splitsz) {
1985  // split and assign
1986  ak_alloc_node* newnode = ak_ptr_cast(ak_alloc_node, (((char*)node) + sz));
1987  int islast = ak_ca_is_last(n->currinfo);
1988 
1989  ak_ca_set_sz(ak_as_ptr(n->currinfo), sz);
1990  ak_ca_set_is_last(ak_as_ptr(n->currinfo), 0);
1991  ak_ca_set_is_free(ak_as_ptr(n->currinfo), 0);
1992  ak_ca_update_footer(n);
1993 
1994  ak_ca_set_sz(ak_as_ptr(newnode->currinfo), nodesz - sz - sizeof(ak_alloc_node));
1995  ak_ca_set_is_first(ak_as_ptr(newnode->currinfo), 0);
1996  ak_ca_set_is_last(ak_as_ptr(newnode->currinfo), islast);
1997  ak_ca_set_is_free(ak_as_ptr(newnode->currinfo), 1);
1998  ak_ca_update_footer(newnode);
1999 
2000  // copy free list node from node
2001  ak_free_list_node* fl = (ak_free_list_node*)(newnode + 1);
2002  ak_free_list_node_link(fl, node->fd, node->bk);
2003  AKMALLOC_ASSERT(n->currinfo == newnode->previnfo);
2004  } else {
2005  // return as is
2006  ak_ca_set_is_free(ak_as_ptr(n->currinfo), 0);
2007  ak_ca_update_footer(n);
2008  ak_free_list_node_unlink(node);
2009  }
2010  return node;
2011  }
2012  }
2013  return AK_NULLPTR;
2014 }
2015 
2016 static int ak_ca_add_new_segment(ak_ca_root* root, char* mem, ak_sz sz)
2017 {
2018  if (ak_likely(mem)) {
2019  // make segment
2020  ak_ca_segment* seg = ak_ptr_cast(ak_ca_segment, (mem + sz - sizeof(ak_ca_segment)));
2021  ak_ca_segment_link(seg, root->main_root.fd, ak_as_ptr(root->main_root));
2022  seg->sz = sz;
2023  seg->head = ak_ptr_cast(ak_alloc_node, mem);
2024  {// add to free list
2025  ak_alloc_node* hd = seg->head;
2026  ak_sz actualsize = (sz - sizeof(ak_alloc_node) - sizeof(ak_ca_segment));
2027  // store actual size in previnfo
2028  hd->previnfo = actualsize;
2029  ak_ca_set_is_first(ak_as_ptr(hd->currinfo), 1);
2030  ak_ca_set_is_last(ak_as_ptr(hd->currinfo), 1);
2031  ak_ca_set_is_free(ak_as_ptr(hd->currinfo), 1);
2032  ak_ca_set_sz(ak_as_ptr(hd->currinfo), actualsize);
2033  ak_free_list_node* fl = (ak_free_list_node*)(hd + 1);
2034  ak_free_list_node_link(fl, root->free_root.fd, ak_as_ptr(root->free_root));
2035  }
2036  return 1;
2037  }
2038  return 0;
2039 }
2040 
2041 static int ak_ca_get_new_segment(ak_ca_root* root, ak_sz sz)
2042 {
2043  // align to segment size multiple
2044  sz += sizeof(ak_ca_segment) + sizeof(ak_alloc_node) + sizeof(ak_free_list_node);
2045  sz = ak_ca_aligned_segment_size(sz);
2046 
2047  // search empty_root for a segment that is as big or more
2048  char* mem = AK_NULLPTR;
2049  ak_sz segsz = sz;
2050  ak_circ_list_for_each(ak_ca_segment, seg, ak_as_ptr(root->empty_root)) {
2051  if (seg->sz >= sz) {
2052  mem = (char*)(seg->head);
2053  segsz = seg->sz;
2054  ak_ca_segment_unlink(seg);
2055  --(root->nempty);
2056  break;
2057  }
2058  }
2059 
2060  return ak_ca_add_new_segment(root, mem ? mem : ((char*)ak_os_alloc(sz)), segsz);
2061 }
2062 
2063 static ak_u32 ak_ca_return_os_mem(ak_ca_segment* r, ak_u32 num)
2064 {
2065  ak_u32 ct = 0;
2066  ak_ca_segment* next = r->fd;
2067  ak_ca_segment* curr = next;
2068  for(; curr != r; curr = next) {
2069  if (ct >= num) {
2070  break;
2071  }
2072  next = curr->fd;
2073  ak_ca_segment_unlink(curr);
2074  ak_os_free(curr->head, curr->sz);
2075  ++ct;
2076  }
2077  return ct;
2078 }
2079 
2080 /**************************************************************/
2081 /* P U B L I C */
2082 /**************************************************************/
2083 
2090 static void ak_ca_init_root(ak_ca_root* root, ak_u32 relrate, ak_u32 maxsegstofree)
2091 {
2092  AKMALLOC_ASSERT_ALWAYS(AK_COALESCE_SEGMENT_SIZE % AK_COALESCE_SEGMENT_GRANULARITY == 0);
2093  AKMALLOC_ASSERT_ALWAYS(((AK_COALESCE_SEGMENT_SIZE & (AK_COALESCE_SEGMENT_SIZE - 1)) == 0) && "Segment size must be a power of 2");
2094 
2095  ak_ca_segment_link(&(root->main_root), &(root->main_root), &(root->main_root));
2096  ak_ca_segment_link(&(root->empty_root), &(root->empty_root), &(root->empty_root));
2097  ak_free_list_node_link(&(root->free_root), &(root->free_root), &(root->free_root));
2098  root->nempty = root->release = 0;
2099 
2100  root->RELEASE_RATE = relrate;
2101  root->MAX_SEGMENTS_TO_FREE = maxsegstofree;
2102  root->MIN_SIZE_TO_SPLIT = (sizeof(ak_free_list_node) >= AK_COALESCE_ALIGN) ? sizeof(ak_free_list_node) : AK_COALESCE_ALIGN;
2103  AK_CA_LOCK_INIT(root);
2104 }
2105 
2110 ak_inline static void ak_ca_init_root_default(ak_ca_root* root)
2111 {
2112 #if AKMALLOC_BITNESS == 32
2113  static const ak_u32 rate = 255;
2114 #else
2115  static const ak_u32 rate = 2047;
2116 #endif
2117  ak_ca_init_root(root, rate, rate);
2118 }
2119 
2128 ak_inline static void* ak_ca_realloc_in_place(ak_ca_root* root, void* mem, ak_sz newsz)
2129 {
2130  void* retmem = AK_NULLPTR;
2131 
2132  ak_alloc_node* n = ak_ptr_cast(ak_alloc_node, mem) - 1;
2133  AKMALLOC_ASSERT(ak_ca_is_free(n->currinfo));
2134  // check if there is a free next, if so, maybe merge
2135  ak_sz sz = ak_ca_to_sz(n->currinfo);
2136 
2137  ak_alloc_node* next = ak_ca_next_node(n);
2138  if (next && ak_ca_is_free(next->currinfo)) {
2139  AKMALLOC_ASSERT(n->currinfo == next->previnfo);
2140  ak_sz nextsz = ak_ca_to_sz(next->currinfo);
2141  ak_sz totalsz = nextsz + sz + sizeof(ak_alloc_node);
2142  if (totalsz >= newsz) {
2143  AK_CA_LOCK_ACQUIRE(root);
2144 
2145  // we could remember the prev and next free entries and link them
2146  // back if the freed size is larger and we split the new node
2147  // but we assume that reallocs are rare and that one realloc may get more
2148  // so we try to keep it simple here, and simply merge the two
2149 
2150  ak_free_list_node_unlink((ak_free_list_node*)(next + 1));
2151  // don't need to change attributes on next as it is goind away
2152  if (ak_ca_is_last(next->currinfo)) {
2153  ak_ca_set_is_last(ak_as_ptr(n->currinfo), 1);
2154  }
2155  ak_ca_set_sz(ak_as_ptr(n->currinfo), totalsz);
2156  ak_ca_update_footer(n);
2157 
2158  retmem = mem;
2159 
2160  AK_CA_LOCK_RELEASE(root);
2161  }
2162  }
2163 
2164  return retmem;
2165 }
2166 
2174 static void* ak_ca_alloc(ak_ca_root* root, ak_sz s)
2175 {
2176  // align and round size
2177  ak_sz sz = ak_ca_aligned_size(s);
2178  AK_CA_LOCK_ACQUIRE(root);
2179  // search free list
2180  ak_sz splitsz = root->MIN_SIZE_TO_SPLIT;
2181  void* mem = ak_ca_search_free_list(ak_as_ptr(root->free_root), sz, splitsz);
2182  // add new segment
2183  if (ak_unlikely(!mem)) {
2184  // NOTE: could also move segments from empty_root to main_root
2185  if (ak_likely(ak_ca_get_new_segment(root, sz))) {
2186  mem = ak_ca_search_free_list(ak_as_ptr(root->free_root), sz, splitsz);
2187  AKMALLOC_ASSERT(mem);
2188  }
2189  }
2190  AK_CA_LOCK_RELEASE(root);
2191  return mem;
2192 }
2193 
2199 ak_inline static void ak_ca_free(ak_ca_root* root, void* m)
2200 {
2201  // get alloc header before
2202  ak_alloc_node* node = ((ak_alloc_node*)m) - 1;
2203 
2204  AK_CA_LOCK_ACQUIRE(root);
2205 
2206  ak_alloc_node* nextnode = ak_ca_next_node(node);
2207  ak_alloc_node* prevnode = ak_ca_prev_node(node);
2208  int coalesce = 0;
2209 
2210  // mark as free
2211  AKMALLOC_ASSERT(!ak_ca_is_free(node->currinfo));
2212  AKMALLOC_ASSERT(!nextnode || (node->currinfo == nextnode->previnfo));
2213  ak_ca_set_is_free(ak_as_ptr(node->currinfo), 1);
2214  ak_ca_update_footer(node);
2215 
2216  // NOTE: maybe this should happen at a lower frequency?
2217  // coalesce if free before or if free after or both
2218  if (prevnode && ak_ca_is_free(node->previnfo)) {
2219  // coalesce back
2220  // update node and the footer
2221  ak_sz newsz = ak_ca_to_sz(node->previnfo) + ak_ca_to_sz(node->currinfo) + sizeof(ak_alloc_node);
2222  ak_ca_set_sz(ak_as_ptr(prevnode->currinfo), newsz);
2223  ak_ca_set_is_last(ak_as_ptr(prevnode->currinfo), nextnode == AK_NULLPTR);
2224  ak_ca_update_footer(prevnode);
2225  AKMALLOC_ASSERT(!nextnode || ak_ca_next_node(prevnode) == nextnode);
2226  AKMALLOC_ASSERT(!nextnode || prevnode->currinfo == nextnode->previnfo);
2227  coalesce += 1;
2228  // update free list
2229  }
2230 
2231  if (nextnode && ak_ca_is_free(nextnode->currinfo)) {
2232  // coalesce forward
2233  // update node and the footer
2234  ak_alloc_node* n = (coalesce) ? prevnode : node;
2235  ak_sz newsz = ak_ca_to_sz(n->currinfo) + ak_ca_to_sz(nextnode->currinfo) + sizeof(ak_alloc_node);
2236  ak_ca_set_sz(ak_as_ptr(n->currinfo), newsz);
2237  ak_ca_set_is_last(ak_as_ptr(n->currinfo), ak_ca_is_last(nextnode->currinfo));
2238  ak_ca_update_footer(n);
2239  AKMALLOC_ASSERT(ak_ca_is_last(n->currinfo) || (n->currinfo == ak_ca_next_node(nextnode)->previnfo));
2240  coalesce += 2;
2241  }
2242 
2243  // update free lists
2244  ak_alloc_node* tocheck = AK_NULLPTR;
2245  switch (coalesce) {
2246  case 0: {
2247  // thread directly
2248  ak_free_list_node* fl = (ak_free_list_node*)(node + 1);
2249  ak_free_list_node_link(fl, root->free_root.fd, ak_as_ptr(root->free_root));
2250  }
2251  break;
2252  case 1: {
2253  // prevnode already threaded through
2254  tocheck = prevnode;
2255  }
2256  break;
2257  case 2: {
2258  // copy free list entry from nextnode
2259  ak_free_list_node* fl = (ak_free_list_node*)(node + 1);
2260  ak_free_list_node* nextfl = (ak_free_list_node*)(nextnode + 1);
2261  ak_free_list_node_link(fl, nextfl->fd, nextfl->bk);
2262  tocheck = node;
2263  }
2264  break;
2265  case 3: {
2266  ak_free_list_node* nextfl = (ak_free_list_node*)(nextnode + 1);
2267  ak_free_list_node_unlink(nextfl);
2268  tocheck = prevnode;
2269  }
2270  break;
2271  default:
2272  AKMALLOC_ASSERT_ALWAYS(0 && "Should not get here!");
2273  break;
2274  }
2275 
2276  // move to empty if segment is empty
2277 
2278  if (tocheck && ak_ca_is_first(tocheck->currinfo) && ak_ca_is_last(tocheck->currinfo)) {
2279  // remove free list entry
2280  ak_free_list_node* fl = (ak_free_list_node*)(tocheck + 1);
2281  ak_free_list_node_unlink(fl);
2282  // actual size is in tocheck->previnfo
2283  AKMALLOC_ASSERT(tocheck->previnfo == ak_ca_to_sz(tocheck->currinfo));
2284  ak_ca_segment* seg = ak_ptr_cast(ak_ca_segment, ((char*)(tocheck + 1) + tocheck->previnfo));
2285  AKMALLOC_ASSERT(tocheck->previnfo == (seg->sz - sizeof(ak_alloc_node) - sizeof(ak_ca_segment)));
2286  ak_ca_segment_unlink(seg);
2287  ak_ca_segment_link(seg, root->empty_root.fd, ak_as_ptr(root->empty_root));
2288  ++(root->nempty); ++(root->release);
2289  // check if we should free empties
2290  if (root->release >= root->RELEASE_RATE) {
2291  // release segment
2292  ak_u32 nrem = ak_ca_return_os_mem(ak_as_ptr(root->empty_root), root->MAX_SEGMENTS_TO_FREE);
2293  root->nempty -= nrem;
2294  root->release = 0;
2295  }
2296  }
2297 
2298  AK_CA_LOCK_RELEASE(root);
2299 }
2300 
2305 static void ak_ca_destroy(ak_ca_root* root)
2306 {
2307  ak_ca_return_os_mem(ak_as_ptr(root->main_root), AK_U32_MAX);
2308  ak_ca_return_os_mem(ak_as_ptr(root->empty_root), AK_U32_MAX);
2309  root->nempty = root->release = 0;
2310 }
2311 /********************** coalescing allocator end ************************/
2312 
2313 /********************** mallocstate begin **********************/
2436 static void* ak_memset(void* m, int v, ak_sz sz)
2437 {
2438  char* mem = (char*)m;
2439  for (ak_sz i = 0; i != sz; ++i) {
2440  mem[i] = v;
2441  }
2442  return m;
2443 }
2444 
2445 static void ak_memcpy(void* d, const void* s, ak_sz sz)
2446 {
2447  char* mem = (char*)d;
2448  const char* srcmem = (const char*)s;
2449  for (ak_sz i = 0; i != sz; ++i) {
2450  mem[i] = srcmem[i];
2451  }
2452 }
2453 
2454 // Coalescing allocs give 16-byte aligned memory where the preamble
2455 // uses three bits. The fourth bit is always free. We use that bit
2456 // to distinguish slabs from coalesced outputs, and mmap-outputs.
2457 //
2458 // xxx0 - coalesce
2459 // 0101 - slab
2460 // 1001 - mmap
2461 #define ak_alloc_type_bits(p) \
2462  ((*(((const ak_sz*)(p)) - 1)) & (AK_COALESCE_ALIGN - 1))
2463 
2464 #define ak_alloc_type_coalesce(sz) \
2465  ((((ak_sz)sz) & ((ak_sz)8)) == 0)
2466 
2467 #define ak_alloc_type_slab(sz) \
2468  ((((ak_sz)sz) & (AK_COALESCE_ALIGN - 1)) == 10)
2469 
2470 #define ak_alloc_type_mmap(sz) \
2471  ((((ak_sz)sz) & (AK_COALESCE_ALIGN - 1)) == 9)
2472 
2473 #define ak_alloc_mark_coalesce(p) ((void)(p))
2474 
2475 #define ak_alloc_mark_slab(p) \
2476  *(((ak_sz*)(p)) - 1) = ((ak_sz)10)
2477 
2478 #define ak_alloc_mark_mmap(p) \
2479  *(((ak_sz*)(p)) - 1) = ((ak_sz)9)
2480 
2481 #if defined(AK_MIN_SLAB_ALIGN_16)
2482 # define ak_slab_mod_sz(x) (ak_ca_aligned_size((x)) + AK_COALESCE_ALIGN)
2483 # define ak_slab_alloc_2_mem(x) (((ak_sz*)x) + (AK_COALESCE_ALIGN / sizeof(ak_sz)))
2484 # define ak_slab_mem_2_alloc(x) (((ak_sz*)x) - (AK_COALESCE_ALIGN / sizeof(ak_sz)))
2485 # define ak_slab_usable_size(x) ((x) - AK_COALESCE_ALIGN)
2486 #else
2487 # define ak_slab_mod_sz(x) (ak_ca_aligned_size((x) + sizeof(ak_sz)))
2488 # define ak_slab_alloc_2_mem(x) (((ak_sz*)x) + 1)
2489 # define ak_slab_mem_2_alloc(x) (((ak_sz*)x) - 1)
2490 # define ak_slab_usable_size(x) ((x) - sizeof(ak_sz))
2491 #endif
2492 
2493 /* cannot be changed. we have fixed size slabs */
2494 #define MIN_SMALL_REQUEST 256
2495 
2496 #if !defined(MMAP_SIZE)
2497 # if !AKMALLOC_WINDOWS
2498 # define MMAP_SIZE (AK_SZ_ONE << 20) /* 1 MB */
2499 # else/* Windows */
2500 
2504 # define MMAP_SIZE AK_SZ_MAX
2505 # endif
2506 #endif
2507 
2508 
2509 #define NSLABS 16
2510 
2514 static const ak_sz SLAB_SIZES[NSLABS] = {
2515  16, 32, 48, 64, 80, 96, 112, 128,
2516  144, 160, 176, 192, 208, 224, 240, 256
2517 };
2518 
2519 #define NCAROOTS 8
2520 
2526 static const ak_sz CA_SIZES[NCAROOTS] = {
2527  768, 1408, 2048, 4096, 8192, 16384, 65536, MMAP_SIZE
2528 };
2529 
2530 typedef struct ak_malloc_state_tag ak_malloc_state;
2531 
2536 {
2537  ak_sz init;
2538  ak_slab_root slabs[NSLABS];
2539  ak_ca_root ca[NCAROOTS];
2540  ak_ca_segment map_root;
2542  AKMALLOC_LOCK_DEFINE(MAP_LOCK);
2543 };
2544 
2545 #if !defined(AKMALLOC_COALESCING_ALLOC_RELEASE_RATE)
2546 # define AKMALLOC_COALESCING_ALLOC_RELEASE_RATE 24
2547 #endif
2548 
2549 #if !defined(AKMALLOC_COALESCING_ALLOC_MAX_PAGES_TO_FREE)
2550 # define AKMALLOC_COALESCING_ALLOC_MAX_PAGES_TO_FREE AKMALLOC_COALESCING_ALLOC_RELEASE_RATE
2551 #endif
2552 
2553 static void ak_try_reclaim_memory(ak_malloc_state* m)
2554 {
2555  // for each slab, reclaim empty pages
2556  for (ak_sz i = 0; i < NSLABS; ++i) {
2557  ak_slab_root* s = ak_as_ptr(m->slabs[i]);
2558  ak_slab_release_pages(s, ak_as_ptr(s->empty_root), AK_U32_MAX);
2559  s->nempty = 0;
2560  s->release = 0;
2561  }
2562  // return unused segments in ca
2563  for (ak_sz i = 0; i < NCAROOTS; ++i) {
2564  ak_ca_root* ca = ak_as_ptr(m->ca[i]);
2565  ak_ca_return_os_mem(ak_as_ptr(ca->empty_root), AK_U32_MAX);
2566  ca->nempty = 0;
2567  ca->release = 0;
2568  }
2569 
2570  // all memory in mmap-ed regions is being used. we return pages immediately
2571  // when they are free'd.
2572 }
2573 
2574 ak_inline static void* ak_try_slab_alloc(ak_malloc_state* m, size_t sz)
2575 {
2576  AKMALLOC_ASSERT(sz % AK_COALESCE_ALIGN == 0);
2577  ak_sz idx = (sz >> 4) - 1;
2578  ak_sz* mem = (ak_sz*)ak_slab_alloc(ak_as_ptr(m->slabs[idx]));
2579  if (ak_likely(mem)) {
2580  ak_alloc_mark_slab(ak_slab_alloc_2_mem(mem)); // we overallocate
2581  AKMALLOC_ASSERT(ak_alloc_type_slab(ak_alloc_type_bits(ak_slab_alloc_2_mem(mem))));
2582  mem = ak_slab_alloc_2_mem(mem);
2583  }
2584  return mem;
2585 }
2586 
2587 ak_inline static void* ak_try_coalesce_alloc(ak_malloc_state* m, ak_ca_root* proot, size_t sz)
2588 {
2589  ak_sz* mem = (ak_sz*)ak_ca_alloc(proot, sz);
2590  if (ak_likely(mem)) {
2591  ak_alloc_mark_coalesce(mem);
2592  AKMALLOC_ASSERT(ak_alloc_type_coalesce(ak_alloc_type_bits(mem)));
2593  }
2594  return mem;
2595 }
2596 
2597 ak_inline static void* ak_try_alloc_mmap(ak_malloc_state* m, size_t sz)
2598 {
2599  AKMALLOC_LOCK_ACQUIRE(ak_as_ptr(m->MAP_LOCK));
2600  ak_ca_segment* mem = (ak_ca_segment*)ak_os_alloc(sz);
2601  if (ak_likely(mem)) {
2602  ak_alloc_mark_mmap(mem + 1);
2603  AKMALLOC_ASSERT(ak_alloc_type_mmap(ak_alloc_type_bits(mem + 1)));
2604  mem->sz = sz;
2605  ak_ca_segment_link(mem, m->map_root.fd, ak_as_ptr(m->map_root));
2606  mem += 1;
2607  }
2608  AKMALLOC_LOCK_RELEASE(ak_as_ptr(m->MAP_LOCK));
2609 
2610  return mem;
2611 }
2612 
2613 ak_inline static ak_ca_root* ak_find_ca_root(ak_malloc_state* m, ak_sz sz)
2614 {
2615  ak_sz i = 0;
2616  for (; i < NCAROOTS; ++i) {
2617  if (CA_SIZES[i] >= sz) {
2618  break;
2619  }
2620  }
2621  AKMALLOC_ASSERT(i < NCAROOTS);
2622  return ak_as_ptr(m->ca[i]);
2623 }
2624 
2625 ak_inline static void* ak_try_alloc(ak_malloc_state* m, size_t sz)
2626 {
2627  void* retmem = AK_NULLPTR;
2628  ak_sz modsz = ak_slab_mod_sz(sz);
2629  if (modsz <= MIN_SMALL_REQUEST) {
2630  retmem = ak_try_slab_alloc(m, modsz);
2631  DBG_PRINTF("a,slab,%p,%llu\n", retmem, modsz);
2632  } else if (sz < MMAP_SIZE) {
2633  const ak_sz alnsz = ak_ca_aligned_size(sz);
2634  ak_ca_root* proot = ak_find_ca_root(m, sz);
2635  retmem = ak_try_coalesce_alloc(m, proot, alnsz);
2636  DBG_PRINTF("a,ca[%d],%p,%llu\n", (int)(proot-ak_as_ptr(m->ca[0])), retmem, alnsz);
2637  } else {
2638  sz += sizeof(ak_ca_segment);
2639  const ak_sz actsz = ak_ca_aligned_segment_size(sz);
2640  retmem = ak_try_alloc_mmap(m, actsz);
2641  DBG_PRINTF("a,mmap,%p,%llu\n", retmem, actsz);
2642  }
2643 
2644  return retmem;
2645 }
2646 
2647 
2648 static void* ak_aligned_alloc_from_state_no_checks(ak_malloc_state* m, size_t aln, size_t sz)
2649 {
2650  ak_sz req = ak_ca_aligned_size(sz);
2651  req += aln + (2 * sizeof(ak_alloc_node)) + (2 * sizeof(ak_free_list_node));
2652  char* mem = AK_NULLPTR;
2653  // must request from coalesce alloc so we can return the extra piece
2654  ak_ca_root* ca = ak_find_ca_root(m, req);
2655 
2656  mem = (char*)ak_ca_alloc(ca, req);
2657  ak_alloc_node* node = ak_ptr_cast(ak_alloc_node, mem) - 1;
2658  if (ak_likely(mem)) {
2659  if ((((ak_sz)mem) & (aln - 1)) != 0) {
2660  // misaligned
2661  AK_CA_LOCK_ACQUIRE(ca);
2662  char* alnpos = (char*)(((ak_sz)(mem + aln - 1)) & ~(aln - 1));
2663  ak_alloc_node* alnnode = ak_ptr_cast(ak_alloc_node, alnpos) - 1;
2664  AKMALLOC_ASSERT((ak_sz)(alnpos - mem) >= (sizeof(ak_free_list_node) + sizeof(ak_alloc_node)));
2665  ak_sz actsz = ak_ca_to_sz(node->currinfo);
2666  int islast = ak_ca_is_last(node);
2667 
2668  ak_ca_set_sz(ak_as_ptr(node->currinfo), alnpos - mem - sizeof(ak_alloc_node));
2669  ak_ca_set_is_last(ak_as_ptr(node->currinfo), 0);
2670  ak_ca_set_is_free(ak_as_ptr(node->currinfo), 1);
2671 
2672  ak_ca_set_sz(ak_as_ptr(alnnode->currinfo), actsz - ak_ca_to_sz(node->currinfo));
2673  ak_ca_set_is_last(ak_as_ptr(alnnode->currinfo), islast);
2674  ak_ca_set_is_free(ak_as_ptr(alnnode->currinfo), 0);
2675 
2676  ak_ca_update_footer(node);
2677  ak_ca_update_footer(alnnode);
2678  mem = alnpos;
2679  AK_CA_LOCK_RELEASE(ca);
2680  }
2681  return mem;
2682  }
2683  return AK_NULLPTR;
2684 }
2685 
2686 ak_inline static size_t ak_malloc_usable_size_in_state(const void* mem);
2687 
2688 /**************************************************************/
2689 /* P U B L I C */
2690 /**************************************************************/
2691 
2696 static void ak_malloc_init_state(ak_malloc_state* s)
2697 {
2698  AKMALLOC_ASSERT_ALWAYS(sizeof(ak_slab) % AK_COALESCE_ALIGN == 0);
2699 
2700  for (ak_sz i = 0; i != NSLABS; ++i) {
2701  ak_slab_init_root_default(ak_as_ptr(s->slabs[i]), SLAB_SIZES[i]);
2702  }
2703 
2704  for (ak_sz i = 0; i != NCAROOTS; ++i) {
2705  ak_ca_init_root(ak_as_ptr(s->ca[i]), AKMALLOC_COALESCING_ALLOC_RELEASE_RATE, AKMALLOC_COALESCING_ALLOC_MAX_PAGES_TO_FREE);
2706  }
2707 
2708  ak_ca_segment_link(ak_as_ptr(s->map_root), ak_as_ptr(s->map_root), ak_as_ptr(s->map_root));
2709 
2710  AKMALLOC_LOCK_INIT(ak_as_ptr(s->MAP_LOCK));
2711  s->init = 1;
2712 }
2713 
2718 static void ak_malloc_destroy_state(ak_malloc_state* m)
2719 {
2720  for (ak_sz i = 0; i < NSLABS; ++i) {
2721  ak_slab_destroy(ak_as_ptr(m->slabs[i]));
2722  }
2723  for (ak_sz i = 0; i < NCAROOTS; ++i) {
2724  ak_ca_destroy(ak_as_ptr(m->ca[i]));
2725  }
2726  {// mmaped chunks
2727  ak_ca_segment temp;
2728  ak_circ_list_for_each(ak_ca_segment, seg, &(m->map_root)) {
2729  temp = *seg;
2730  ak_os_free(seg, seg->sz);
2731  seg = &temp;
2732  }
2733  }
2734 }
2735 
2743 static void* ak_malloc_from_state(ak_malloc_state* m, size_t sz)
2744 {
2745  AKMALLOC_ASSERT(m->init);
2746  void* mem = ak_try_alloc(m, sz);
2747  if (ak_unlikely(!mem)) {
2748  ak_try_reclaim_memory(m);
2749  mem = ak_try_alloc(m, sz);
2750  }
2751  return mem;
2752 }
2753 
2759 ak_inline static void ak_free_to_state(ak_malloc_state* m, void* mem)
2760 {
2761  if (ak_likely(mem)) {
2762 #if defined(AKMALLOC_DEBUG_PRINT)
2763  ak_sz ussize = ak_malloc_usable_size_in_state(mem);
2764 #endif/*defined(AKMALLOC_DEBUG_PRINT)*/
2765  ak_sz ty = ak_alloc_type_bits(mem);
2766  if (ak_alloc_type_slab(ty)) {
2767  DBG_PRINTF("d,slab,%p,%llu\n", mem, ussize);
2768  ak_slab_free(ak_slab_mem_2_alloc(mem));
2769  } else if (ak_alloc_type_mmap(ty)) {
2770  DBG_PRINTF("d,mmap,%p,%llu\n", mem, ussize);
2771  AKMALLOC_LOCK_ACQUIRE(ak_as_ptr(m->MAP_LOCK));
2772  ak_ca_segment* seg = ((ak_ca_segment*)mem) - 1;
2773  ak_ca_segment_unlink(seg);
2774  ak_os_free(seg, seg->sz);
2775  AKMALLOC_LOCK_RELEASE(ak_as_ptr(m->MAP_LOCK));
2776  } else {
2777  AKMALLOC_ASSERT(ak_alloc_type_coalesce(ty));
2778  const ak_alloc_node* n = ((const ak_alloc_node*)mem) - 1;
2779  const ak_sz alnsz = ak_ca_to_sz(n->currinfo);
2780  ak_ca_root* proot = ak_find_ca_root(m, alnsz);
2781  DBG_PRINTF("d,ca[%d],%p,%llu\n", (int)(proot-ak_as_ptr(m->ca[0])), mem, alnsz);
2782  ak_ca_free(proot, mem);
2783  }
2784  }
2785 }
2786 
2795 ak_inline static void* ak_realloc_in_place_from_state(ak_malloc_state* m, void* mem, size_t newsz)
2796 {
2797  const ak_sz usablesize = ak_malloc_usable_size_in_state(mem);
2798  if (usablesize >= newsz) {
2799  return mem;
2800  }
2801  if (ak_alloc_type_coalesce(ak_alloc_type_bits(mem))) {
2802  ak_alloc_node* n = ak_ptr_cast(ak_alloc_node, mem) - 1;
2803  AKMALLOC_ASSERT(ak_ca_is_free(n->currinfo));
2804  // check if there is a free next, if so, maybe merge
2805  ak_sz sz = ak_ca_to_sz(n->currinfo);
2806  ak_ca_root* proot = ak_find_ca_root(m, sz);
2807  if (ak_ca_realloc_in_place(proot, mem, newsz)) {
2808  return mem;
2809  }
2810  }
2811  return AK_NULLPTR;
2812 }
2813 
2826 static void* ak_realloc_from_state(ak_malloc_state* m, void* mem, size_t newsz)
2827 {
2828  if (ak_unlikely(!mem)) {
2829  return ak_malloc_from_state(m, newsz);
2830  }
2831 
2832  if (ak_realloc_in_place_from_state(m, mem, newsz)) {
2833  return mem;
2834  }
2835 
2836  void* newmem = ak_malloc_from_state(m, newsz);
2837  if (!newmem) {
2838  return AK_NULLPTR;
2839  }
2840  if (ak_likely(mem)) {
2841  ak_memcpy(newmem, mem, ak_malloc_usable_size_in_state(mem));
2842  ak_free_to_state(m, mem);
2843  }
2844  return newmem;
2845 }
2846 
2847 
2854 ak_inline static size_t ak_malloc_usable_size_in_state(const void* mem)
2855 {
2856  if (ak_likely(mem)) {
2857  ak_sz ty = ak_alloc_type_bits(mem);
2858  if (ak_alloc_type_slab(ty)) {
2859  // round to page
2860  const ak_slab* slab = (const ak_slab*)(ak_page_start_before_const(mem));
2861  return ak_slab_usable_size(slab->root->sz);
2862  } else if (ak_alloc_type_mmap(ty)) {
2863  return (((const ak_ca_segment*)mem) - 1)->sz - sizeof(ak_ca_segment);
2864  } else {
2865  AKMALLOC_ASSERT(ak_alloc_type_coalesce(ty));
2866  const ak_alloc_node* n = ((const ak_alloc_node*)mem) - 1;
2867  AKMALLOC_ASSERT(!ak_ca_is_free(n->currinfo));
2868  return ak_ca_to_sz(n->currinfo);
2869  }
2870  } else {
2871  return 0;
2872  }
2873 }
2874 
2884 static void* ak_aligned_alloc_from_state(ak_malloc_state* m, size_t aln, size_t sz)
2885 {
2886  if (aln <= AK_COALESCE_ALIGN) {
2887  return ak_malloc_from_state(m, sz);
2888  }
2889  if ((aln & AK_SZ_ONE) || (aln & (aln - 1))) {
2890  size_t a = AK_COALESCE_ALIGN << 1;
2891  while (a < aln) {
2892  a = (a << 1);
2893  }
2894  aln = a;
2895  }
2896  return ak_aligned_alloc_from_state_no_checks(m, aln, sz);
2897 }
2898 
2899 #define AK_EINVAL 22
2900 #define AK_ENOMEM 12
2901 
2914 static int ak_posix_memalign_from_state(ak_malloc_state* m, void** pmem, size_t aln, size_t sz)
2915 {
2916  void* mem = AK_NULLPTR;
2917  if (aln == AK_COALESCE_ALIGN) {
2918  mem = ak_malloc_from_state(m, sz);
2919  } else {
2920  ak_sz div = (aln / sizeof(ak_sz));
2921  ak_sz rem = (aln & (sizeof(ak_sz)));
2922  if (rem != 0 || div == 0 || (div & (div - AK_SZ_ONE)) != 0) {
2923  return AK_EINVAL;
2924  }
2925  aln = (aln <= AK_COALESCE_ALIGN) ? AK_COALESCE_ALIGN : aln;
2926  mem = ak_aligned_alloc_from_state_no_checks(m, aln, sz);
2927  }
2928 
2929  if (!mem) {
2930  return AK_ENOMEM;
2931  }
2932 
2933  *pmem = mem;
2934  return 0;
2935 }
2936 
2942 static void ak_malloc_for_each_segment_in_state(ak_malloc_state* m, ak_seg_cbk cbk)
2943 {
2944  // for each slab, reclaim empty pages
2945  for (ak_sz i = 0; i < NSLABS; ++i) {
2946  ak_slab_root* s = ak_as_ptr(m->slabs[i]);
2947  ak_circ_list_for_each(ak_slab, fslab, &(s->full_root)) {
2948  if (!cbk(fslab, AKMALLOC_DEFAULT_PAGE_SIZE)) {
2949  return;
2950  }
2951  }
2952  ak_circ_list_for_each(ak_slab, pslab, &(s->partial_root)) {
2953  if (!cbk(pslab, AKMALLOC_DEFAULT_PAGE_SIZE)) {
2954  return;
2955  }
2956  }
2957  }
2958 
2959  {// ca roots
2960  for (ak_sz i = 0; i < NCAROOTS; ++i) {
2961  ak_circ_list_for_each(ak_ca_segment, seg, &(m->ca[i].main_root)) {
2962  if (!cbk(seg->head, seg->sz)) {
2963  return;
2964  }
2965  }
2966  }
2967  }
2968 
2969  {// mmaped chunks
2970  ak_circ_list_for_each(ak_ca_segment, seg, &(m->map_root)) {
2971  if (!cbk(seg, seg->sz)) {
2972  return;
2973  }
2974  }
2975  }
2976 }
2977 /********************** mallocstate end ************************/
2978 
2979 /***********************************************
2980  * Exported APIs
2981  ***********************************************/
2982 
2983 static int MALLOC_INIT = 0;
2984 
2985 static ak_malloc_state MALLOC_ROOT;
2986 static ak_malloc_state* GMSTATE = AK_NULLPTR;
2987 static ak_spinlock MALLOC_INIT_LOCK = { 0 };
2988 
2989 #define ak_ensure_malloc_state_init() \
2990 { \
2991  if (ak_unlikely(!MALLOC_INIT)) { \
2992  AKMALLOC_LOCK_ACQUIRE(ak_as_ptr(MALLOC_INIT_LOCK)); \
2993  if (MALLOC_INIT != 1) { \
2994  GMSTATE = &MALLOC_ROOT; \
2995  ak_malloc_init_state(GMSTATE); \
2996  MALLOC_INIT = 1; \
2997  } \
2998  AKMALLOC_LOCK_RELEASE(ak_as_ptr(MALLOC_INIT_LOCK)); \
2999  } \
3000  AKMALLOC_ASSERT(MALLOC_ROOT.init); \
3001 }
3002 
3003 AK_EXTERN_C_BEGIN
3004 
3005 void* ak_malloc(size_t sz)
3006 {
3007  ak_ensure_malloc_state_init();
3008  return ak_malloc_from_state(GMSTATE, sz);
3009 }
3010 
3011 void* ak_calloc(size_t elsz, size_t numel)
3012 {
3013  const ak_sz sz = elsz*numel;
3014  void* mem = ak_malloc_from_state(GMSTATE, sz);
3015  return ak_memset(mem, 0, sz);
3016 }
3017 
3018 void ak_free(void* mem)
3019 {
3020  ak_ensure_malloc_state_init();
3021  ak_free_to_state(GMSTATE, mem);
3022 }
3023 
3024 void* ak_aligned_alloc(size_t sz, size_t aln)
3025 {
3026  ak_ensure_malloc_state_init();
3027  return ak_aligned_alloc_from_state(GMSTATE, sz, aln);
3028 }
3029 
3030 int ak_posix_memalign(void** pmem, size_t aln, size_t sz)
3031 {
3032  ak_ensure_malloc_state_init();
3033  return ak_posix_memalign_from_state(GMSTATE, pmem, aln, sz);
3034 }
3035 
3036 void* ak_memalign(size_t sz, size_t aln)
3037 {
3038  ak_ensure_malloc_state_init();
3039  return ak_aligned_alloc_from_state(GMSTATE, sz, aln);
3040 }
3041 
3042 size_t ak_malloc_usable_size(const void* mem)
3043 {
3044  return ak_malloc_usable_size_in_state(mem);
3045 }
3046 
3047 void* ak_realloc(void* mem, size_t newsz)
3048 {
3049  ak_ensure_malloc_state_init();
3050  return ak_realloc_from_state(GMSTATE, mem, newsz);
3051 }
3052 
3053 void* ak_realloc_in_place(void* mem, size_t newsz)
3054 {
3055  ak_ensure_malloc_state_init();
3056  return ak_realloc_in_place_from_state(GMSTATE, mem, newsz);
3057 }
3058 
3060 {
3061  ak_ensure_malloc_state_init();
3063 }
3064 
3065 AK_EXTERN_C_END
3066 
3067 #endif/*AKMALLOC_MALLOC_C*/
void ak_free(void *mem)
Definition: malloc.c:3018
ak_ca_root ca[8]
Definition: malloc.c:2539
ak_free_list_node free_root
Definition: malloc.c:1826
ak_spinlock LOCKED
Definition: malloc.c:1355
ak_spinlock LOCKED
Definition: malloc.c:1836
void * ak_memalign(size_t sz, size_t aln)
Definition: malloc.c:3036
static void * ak_realloc_in_place_from_state(ak_malloc_state *m, void *mem, size_t newsz)
Definition: malloc.c:2795
static const ak_sz SLAB_SIZES[16]
Definition: malloc.c:2514
static void * ak_ca_alloc(ak_ca_root *root, ak_sz s)
Definition: malloc.c:2174
size_t ak_malloc_usable_size(const void *mem)
Definition: malloc.c:3042
ak_u32 npages
Definition: malloc.c:1343
ak_u32 nempty
Definition: malloc.c:1345
static void * ak_aligned_alloc_from_state(ak_malloc_state *m, size_t aln, size_t sz)
Definition: malloc.c:2884
static void * ak_realloc_from_state(ak_malloc_state *m, void *mem, size_t newsz)
Definition: malloc.c:2826
ak_u32 MAX_SEGMENTS_TO_FREE
Definition: malloc.c:1832
void * ak_calloc(size_t elsz, size_t numel)
Definition: malloc.c:3011
ak_ca_segment empty_root
Definition: malloc.c:1824
static void ak_free_to_state(ak_malloc_state *m, void *mem)
Definition: malloc.c:2759
void * ak_malloc(size_t sz)
Definition: malloc.c:3005
static void ak_malloc_for_each_segment_in_state(ak_malloc_state *m, ak_seg_cbk cbk)
Definition: malloc.c:2942
static void * ak_ca_realloc_in_place(ak_ca_root *root, void *mem, ak_sz newsz)
Definition: malloc.c:2128
ak_ca_segment map_root
Definition: malloc.c:2540
static void ak_malloc_init_state(ak_malloc_state *s)
Definition: malloc.c:2696
int ak_posix_memalign(void **pmem, size_t aln, size_t sz)
Definition: malloc.c:3030
static void ak_slab_init_root(ak_slab_root *s, ak_sz sz, ak_u32 npages, ak_u32 relrate, ak_u32 maxpagefree)
Definition: malloc.c:1586
ak_u32 release
Definition: malloc.c:1346
ak_u32 RELEASE_RATE
Definition: malloc.c:1831
static void * ak_malloc_from_state(ak_malloc_state *m, size_t sz)
Definition: malloc.c:2743
ak_u32 RELEASE_RATE
Definition: malloc.c:1353
#define AK_COALESCE_ALIGN
Definition: malloc.c:1761
static void * ak_slab_alloc(ak_slab_root *root)
Definition: malloc.c:1619
static void ak_ca_init_root(ak_ca_root *root, ak_u32 relrate, ak_u32 maxsegstofree)
Definition: malloc.c:2090
static void ak_ca_init_root_default(ak_ca_root *root)
Definition: malloc.c:2110
int(* ak_seg_cbk)(const void *, size_t)
Definition: malloc.c:1261
ak_slab empty_root
Definition: malloc.c:1351
static void ak_slab_init_root_default(ak_slab_root *s, ak_sz sz)
Definition: malloc.c:1608
ak_slab partial_root
Definition: malloc.c:1349
ak_u32 _unused
Definition: malloc.c:1347
void * ak_realloc(void *mem, size_t newsz)
Definition: malloc.c:3047
static int ak_posix_memalign_from_state(ak_malloc_state *m, void **pmem, size_t aln, size_t sz)
Definition: malloc.c:2914
ak_slab_root slabs[16]
Definition: malloc.c:2538
static void ak_ca_free(ak_ca_root *root, void *m)
Definition: malloc.c:2199
ak_u32 MAX_PAGES_TO_FREE
Definition: malloc.c:1354
ak_slab full_root
Definition: malloc.c:1350
static void ak_ca_destroy(ak_ca_root *root)
Definition: malloc.c:2305
static void ak_slab_free(void *p)
Definition: malloc.c:1650
ak_sz MIN_SIZE_TO_SPLIT
Definition: malloc.c:1833
static const ak_sz CA_SIZES[8]
Definition: malloc.c:2526
static void ak_malloc_destroy_state(ak_malloc_state *m)
Definition: malloc.c:2718
ak_ca_segment main_root
Definition: malloc.c:1823
static size_t ak_malloc_usable_size_in_state(const void *mem)
Definition: malloc.c:2854
ak_spinlock MAP_LOCK
Definition: malloc.c:2542
void ak_malloc_for_each_segment(ak_seg_cbk cbk)
Definition: malloc.c:3059
ak_u32 nempty
Definition: malloc.c:1828
ak_u32 navail
Definition: malloc.c:1344
ak_u32 release
Definition: malloc.c:1829
static void ak_slab_destroy(ak_slab_root *root)
Definition: malloc.c:1689
void * ak_aligned_alloc(size_t sz, size_t aln)
Definition: malloc.c:3024