📄 fastmalloc.cpp
字号:
kernel_supports_tls = true; } else { // some other kernel, we'll be optimisitic kernel_supports_tls = true; } // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG }# endif // HAVE_DECL_UNAME#endif // HAVE_TLS// __THROW is defined in glibc systems. It means, counter-intuitively,// "This function will never throw an exception." It's an optional// optimization tool, but we may need to use it to match glibc prototypes.#ifndef __THROW // I guess we're not on a glibc system# define __THROW // __THROW is just an optimization, so ok to make it ""#endif//-------------------------------------------------------------------// Configuration//-------------------------------------------------------------------// Not all possible combinations of the following parameters make// sense. In particular, if kMaxSize increases, you may have to// increase kNumClasses as well.static const size_t kPageShift = 12;static const size_t kPageSize = 1 << kPageShift;static const size_t kMaxSize = 8u * kPageSize;static const size_t kAlignShift = 3;static const size_t kAlignment = 1 << kAlignShift;static const size_t kNumClasses = 68;// Allocates a big block of memory for the pagemap once we reach more than// 128MBstatic const size_t kPageMapBigAllocationThreshold = 128 << 20;// Minimum number of pages to fetch from system at a time. Must be// significantly bigger than kBlockSize to amortize system-call// overhead, and also to reduce external fragementation. Also, we// should keep this value big because various incarnations of Linux// have small limits on the number of mmap() regions per// address-space.static const size_t kMinSystemAlloc = 1 << (20 - kPageShift);// Number of objects to move between a per-thread list and a central// list in one shot. We want this to be not too small so we can// amortize the lock overhead for accessing the central list. Making// it too big may temporarily cause unnecessary memory wastage in the// per-thread free list until the scavenger cleans up the list.static int num_objects_to_move[kNumClasses];// Maximum length we allow a per-thread free-list to have before we// move objects from it into the corresponding central free-list. We// want this big to avoid locking the central free-list too often. It// should not hurt to make this list somewhat big because the// scavenging code will shrink it down when its contents are not in use.static const int kMaxFreeListLength = 256;// Lower and upper bounds on the per-thread cache sizesstatic const size_t kMinThreadCacheSize = kMaxSize * 2;static const size_t kMaxThreadCacheSize = 2 << 20;// Default bound on the total amount of thread cachesstatic const size_t kDefaultOverallThreadCacheSize = 16 << 20;// For all span-lengths < kMaxPages we keep an exact-size list.// REQUIRED: kMaxPages >= kMinSystemAlloc;static const size_t kMaxPages = kMinSystemAlloc;/* The smallest prime > 2^n */static int primes_list[] = { // Small values might cause high rates of sampling // and hence commented out. // 2, 5, 11, 17, 37, 67, 131, 257, // 521, 1031, 2053, 4099, 8209, 16411, 32771, 65537, 131101, 262147, 524309, 1048583, 2097169, 4194319, 8388617, 16777259, 33554467 };// Twice the approximate gap between sampling actions.// I.e., we take one sample approximately once every// tcmalloc_sample_parameter/2// bytes of allocation, i.e., ~ once every 128KB.// Must be a prime number.#ifdef NO_TCMALLOC_SAMPLESDEFINE_int64(tcmalloc_sample_parameter, 0, "Unused: code is compiled with NO_TCMALLOC_SAMPLES");static size_t sample_period = 0;#elseDEFINE_int64(tcmalloc_sample_parameter, 262147, "Twice the approximate gap between sampling actions." " Must be a prime number. Otherwise will be rounded up to a " " larger prime number");static size_t sample_period = 262147;#endif// Protects sample_period abovestatic SpinLock sample_period_lock = SPINLOCK_INITIALIZER;// Parameters for controlling how fast memory is returned to the OS.DEFINE_double(tcmalloc_release_rate, 1, "Rate at which we release unused memory to the system. " "Zero means we never release memory back to the system. " "Increase this flag to return memory faster; decrease it " "to return memory slower. Reasonable rates are in the " "range [0,10]");//-------------------------------------------------------------------// Mapping from size to size_class and vice versa//-------------------------------------------------------------------// Sizes <= 1024 have an alignment >= 8. So for such sizes we have an// array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.// So for these larger sizes we have an array indexed by ceil(size/128).//// We flatten both logical arrays into one physical array and use// arithmetic to compute an appropriate index. The constants used by// ClassIndex() were selected to make the flattening work.//// Examples:// Size Expression Index// -------------------------------------------------------// 0 (0 + 7) / 8 0// 1 (1 + 7) / 8 1// ...// 1024 (1024 + 7) / 8 128// 1025 (1025 + 127 + (120<<7)) / 128 129// ...// 32768 (32768 + 127 + (120<<7)) / 128 376static const size_t kMaxSmallSize = 1024;static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128static const int add_amount[2] = { 7, 127 + (120 << 7) };static unsigned char class_array[377];// Compute index of the class_array[] entry for a given sizestatic inline int ClassIndex(size_t s) { const int i = (s > kMaxSmallSize); return static_cast<int>((s + add_amount[i]) >> shift_amount[i]);}// Mapping from size class to max size storable in that classstatic size_t class_to_size[kNumClasses];// Mapping from size class to number of pages to allocate at a timestatic size_t class_to_pages[kNumClasses];// TransferCache is used to cache transfers of num_objects_to_move[size_class]// back and forth between thread caches and the central cache for a given size// class.struct TCEntry { void *head; // Head of chain of objects. void *tail; // Tail of chain of objects.};// A central cache freelist can have anywhere from 0 to kNumTransferEntries// slots to put link list chains into. To keep memory usage bounded the total// number of TCEntries across size classes is fixed. Currently each size// class is initially given one TCEntry which also means that the maximum any// one class can have is kNumClasses.static const int kNumTransferEntries = kNumClasses;// Note: the following only works for "n"s that fit in 32-bits, but// that is fine since we only use it for small sizes.static inline int LgFloor(size_t n) { int log = 0; for (int i = 4; i >= 0; --i) { int shift = (1 << i); size_t x = n >> shift; if (x != 0) { n = x; log += shift; } } ASSERT(n == 1); return log;}// Some very basic linked list functions for dealing with using void * as// storage.static inline void *SLL_Next(void *t) { return *(reinterpret_cast<void**>(t));}static inline void SLL_SetNext(void *t, void *n) { *(reinterpret_cast<void**>(t)) = n;}static inline void SLL_Push(void **list, void *element) { SLL_SetNext(element, *list); *list = element;}static inline void *SLL_Pop(void **list) { void *result = *list; *list = SLL_Next(*list); return result;}// Remove N elements from a linked list to which head points. head will be// modified to point to the new head. start and end will point to the first// and last nodes of the range. Note that end will point to NULL after this// function is called.static inline void SLL_PopRange(void **head, int N, void **start, void **end) { if (N == 0) { *start = NULL; *end = NULL; return; } void *tmp = *head; for (int i = 1; i < N; ++i) { tmp = SLL_Next(tmp); } *start = *head; *end = tmp; *head = SLL_Next(tmp); // Unlink range from list. SLL_SetNext(tmp, NULL);}static inline void SLL_PushRange(void **head, void *start, void *end) { if (!start) return; SLL_SetNext(end, *head); *head = start;}static inline size_t SLL_Size(void *head) { int count = 0; while (head) { count++; head = SLL_Next(head); } return count;}// Setup helper functions.static ALWAYS_INLINE size_t SizeClass(size_t size) { return class_array[ClassIndex(size)];}// Get the byte-size for a specified classstatic ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) { return class_to_size[cl];}static int NumMoveSize(size_t size) { if (size == 0) return 0; // Use approx 64k transfers between thread and central caches. int num = static_cast<int>(64.0 * 1024.0 / size); if (num < 2) num = 2; // Clamp well below kMaxFreeListLength to avoid ping pong between central // and thread caches. if (num > static_cast<int>(0.8 * kMaxFreeListLength)) num = static_cast<int>(0.8 * kMaxFreeListLength); // Also, avoid bringing in too many objects into small object free // lists. There are lots of such lists, and if we allow each one to // fetch too many at a time, we end up having to scavenge too often // (especially when there are lots of threads and each thread gets a // small allowance for its thread cache). // // TODO: Make thread cache free list sizes dynamic so that we do not // have to equally divide a fixed resource amongst lots of threads. if (num > 32) num = 32; return num;}// Initialize the mapping arraysstatic void InitSizeClasses() { // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] if (ClassIndex(0) < 0) { MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0)); CRASH(); } if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) { MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize)); CRASH(); } // Compute the size classes we want to use size_t sc = 1; // Next size class to assign unsigned char alignshift = kAlignShift; int last_lg = -1; for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) { int lg = LgFloor(size); if (lg > last_lg) { // Increase alignment every so often. // // Since we double the alignment every time size doubles and // size >= 128, this means that space wasted due to alignment is // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256 // bytes, so the space wasted as a percentage starts falling for // sizes > 2K. if ((lg >= 7) && (alignshift < 8)) { alignshift++; } last_lg = lg; } // Allocate enough pages so leftover is less than 1/8 of total. // This bounds wasted space to at most 12.5%. size_t psize = kPageSize; while ((psize % size) > (psize >> 3)) { psize += kPageSize; } const size_t my_pages = psize >> kPageShift; if (sc > 1 && my_pages == class_to_pages[sc-1]) { // See if we can merge this into the previous class without // increasing the fragmentation of the previous class. const size_t my_objects = (my_pages << kPageShift) / size; const size_t prev_objects = (class_to_pages[sc-1] << kPageShift) / class_to_size[sc-1]; if (my_objects == prev_objects) { // Adjust last class to include this size class_to_size[sc-1] = size; continue; } } // Add new class class_to_pages[sc] = my_pages; class_to_size[sc] = size; sc++; } if (sc != kNumClasses) { MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n", sc, int(kNumClasses)); CRASH(); } // Initialize the mapping arrays int next_size = 0; for (unsigned char c = 1; c < kNumClasses; c++) { const size_t max_size_in_class = class_to_size[c]; for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) { class_array[ClassIndex(s)] = c; } next_size = static_cast<int>(max_size_in_class + kAlignment); } // Double-check sizes just to be safe for (size_t size = 0; size <= kMaxSize; size++) { const size_t sc = SizeClass(size); if (sc == 0) { MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); CRASH(); } if (sc > 1 && size <= class_to_size[sc-1]) { MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS "\n", sc, size); CRASH(); } if (sc >= kNumClasses) { MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size); CRASH(); } const size_t s = class_to_size[sc]; if (size > s) { MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); CRASH(); } if (s == 0) { MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc); CRASH(); } } // Initialize the num_objects_to_move array. for (size_t cl = 1; cl < kNumClasses; ++cl) { num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl)); }#ifndef WTF_CHANGES if (false) { // Dump class sizes and maximum external wastage per size class for (size_t cl = 1; cl < kNumClasses; ++cl) { const int alloc_size = class_to_pages[cl] << kPageShift; const int alloc_objs = alloc_size / class_to_size[cl]; const int min_used = (class_to_size[cl-1] + 1) * alloc_objs; const int max_waste = alloc_size - min_used; MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n", int(cl), int(class_to_size[cl-1] + 1), int(class_to_size[cl]), int(class_to_pages[cl] << kPageShift), max_waste * 100.0 / alloc_size ); } }#endif}// -------------------------------------------------------------------------// Simple allocator for objects of a specified type. External locking// is required before accessing one of these objects.// -------------------------------------------------------------------------// Metadata allocator -- keeps stats about how many bytes allocatedstatic uint64_t metadata_system_bytes = 0;static void* MetaDataAlloc(size_t bytes) { void* result = TCMalloc_SystemAlloc(bytes, 0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -