📄 container.h
字号:
}; // Iterator API, like STL. struct const_iterator { T get_key() const { return m_hash->E(m_index).first; } U get_value() const { return m_hash->E(m_index).second; } const entry& operator*() const { assert(is_end() == false && (m_hash->E(m_index).is_empty() == false)); return m_hash->E(m_index); } const entry* operator->() const { return &(operator*()); } void operator++() { assert(m_hash); // Find next non-empty entry. if (m_index <= m_hash->m_table->m_size_mask) { m_index++; while (m_index <= m_hash->m_table->m_size_mask && m_hash->E(m_index).is_empty()) { m_index++; } } } bool operator==(const const_iterator& it) const { if (is_end() && it.is_end()) { return true; } else { return m_hash == it.m_hash && m_index == it.m_index; } } bool operator!=(const const_iterator& it) const { return ! (*this == it); } bool is_end() const { return m_hash == NULL || m_hash->m_table == NULL || m_index > m_hash->m_table->m_size_mask; } protected: friend class hash<T,U,hash_functor>; const_iterator(const hash* h, int index) : m_hash(h), m_index(index) { } const hash* m_hash; int m_index; }; friend struct const_iterator; // non-const iterator; get most of it from const_iterator. struct iterator : public const_iterator { // Allow non-const access to entries. entry& operator*() const { assert(const_iterator::is_end() == false); return const_cast<hash*>(const_iterator::m_hash)->E(const_iterator::m_index); } entry* operator->() const { return &(operator*()); } private: friend class hash<T,U,hash_functor>; iterator(hash* h, int i0) : const_iterator(h, i0) { } }; friend struct iterator; iterator begin() { if (m_table == 0) return iterator(NULL, 0); // Scan til we hit the first valid entry. int i0 = 0; while (i0 <= m_table->m_size_mask && E(i0).is_empty()) { i0++; } return iterator(this, i0); } iterator end() { return iterator(NULL, 0); } const_iterator begin() const { return const_cast<hash*>(this)->begin(); } const_iterator end() const { return const_cast<hash*>(this)->end(); } iterator find(const T& key) { int index = find_index(key); if (index >= 0) { return iterator(this, index); } return iterator(NULL, 0); } const_iterator find(const T& key) const { return const_cast<hash*>(this)->find(key); }private: int find_index(const T& key) const // Find the index of the matching entry. If no match, then return -1. { if (m_table == NULL) return -1; size_t hash_value = hash_functor()(key); int index = hash_value & m_table->m_size_mask; const entry* e = &E(index); if (e->is_empty()) return -1; if (int(e->m_hash_value & m_table->m_size_mask) != index) return -1; // occupied by a collider for (;;) { assert((e->m_hash_value & m_table->m_size_mask) == (hash_value & m_table->m_size_mask)); if (e->m_hash_value == hash_value && e->first == key) { // Found it. return index; } assert(! (e->first == key)); // keys are equal, but hash differs! // Keep looking through the chain. index = e->m_next_in_chain; if (index == -1) break; // end of chain assert(index >= 0 && index <= m_table->m_size_mask); e = &E(index); assert(e->is_empty() == false); } return -1; } // Helpers. entry& E(int index) { assert(m_table); assert(index >= 0 && index <= m_table->m_size_mask); return *(((entry*) (m_table + 1)) + index); } const entry& E(int index) const { assert(m_table); assert(index >= 0 && index <= m_table->m_size_mask); return *(((entry*) (m_table + 1)) + index); } void set_raw_capacity(int new_size) // Resize the hash table to the given size (Rehash the // contents of the current table). The arg is the number of // hash table entries, not the number of elements we should // actually contain (which will be less than this). { if (new_size <= 0) { // Special case. clear(); return; } // Force new_size to be a power of two. int bits = fchop(log2((float)(new_size-1)) + 1); assert((1 << bits) >= new_size); new_size = 1 << bits; // Minimum size; don't incur rehashing cost when // expanding very small tables. if (new_size < 16) { new_size = 16; } hash<T, U, hash_functor> new_hash; new_hash.m_table = (table*) tu_malloc(sizeof(table) + sizeof(entry) * new_size); assert(new_hash.m_table); // @@ need to throw (or something) on malloc failure! new_hash.m_table->m_entry_count = 0; new_hash.m_table->m_size_mask = new_size - 1; {for (int i = 0; i < new_size; i++) { new_hash.E(i).m_next_in_chain = -2; // mark empty }} // Copy stuff to new_hash if (m_table) { for (int i = 0, n = m_table->m_size_mask; i <= n; i++) { entry* e = &E(i); if (e->is_empty() == false) { // Insert old entry into new hash. new_hash.add(e->first, e->second); e->clear(); // placement delete of old element } } // Delete our old data buffer. tu_free(m_table, sizeof(table) + sizeof(entry) * (m_table->m_size_mask + 1)); } // Steal new_hash's data. m_table = new_hash.m_table; new_hash.m_table = NULL; } struct table { int m_entry_count; int m_size_mask; // entry array goes here! }; table* m_table;};#endif // not _TU_USE_STLclass tu_stringi;// String-like type. Attempt to be memory-efficient with small strings.class tu_string{public: tu_string() { m_local.m_size = 1; memset(m_local.m_buffer, 0, 15); } tu_string(const char* str) { m_local.m_size = 1; m_local.m_buffer[0] = 0; int new_size = strlen(str); resize(new_size); strcpy(get_buffer(), str); } tu_string(const char* buf, int buflen) { m_local.m_size = 1; m_local.m_buffer[0] = 0; int new_size = buflen; resize(new_size); memcpy(get_buffer(), buf, buflen); get_buffer()[buflen] = 0; // terminate. } tu_string(const tu_string& str) { m_local.m_size = 1; m_local.m_buffer[0] = 0; resize(str.size()); strcpy(get_buffer(), str.get_buffer()); } tu_string(const uint32* wide_char_str) { m_local.m_size = 1; m_local.m_buffer[0] = 0; *this = wide_char_str; } tu_string(const uint16* wide_char_str) { m_local.m_size = 1; m_local.m_buffer[0] = 0; *this = wide_char_str; } ~tu_string() { if (using_heap()) { tu_free(m_heap.m_buffer, m_heap.m_capacity); } } operator const char*() const { return get_buffer(); } const char* c_str() const { return (const char*) (*this); } // If you need a const tu_stringi, don't create a new object; // these things have the same internal representation. const tu_stringi& to_tu_stringi() const { return *(tu_stringi*) this; } // operator= returns void; if you want to know why, ask Charles Bloom :) // (executive summary: a = b = c is an invitation to bad code) void operator=(const char* str) { resize(strlen(str)); strcpy(get_buffer(), str); } void operator=(const tu_string& str) { resize(str.size()); strcpy(get_buffer(), str.get_buffer()); } bool operator==(const char* str) const { return strcmp(*this, str) == 0; } bool operator!=(const char* str) const { return strcmp(*this, str) != 0; } bool operator==(const tu_string& str) const { return strcmp(*this, str) == 0; } bool operator!=(const tu_string& str) const { return strcmp(*this, str) != 0; } int length() const { if (using_heap() == false) { return m_local.m_size - 1; } else { return m_heap.m_size - 1; } } int size() const { return length(); } char& operator[](int index) { assert(index >= 0 && index <= size()); return get_buffer()[index]; } const char& operator[](int index) const { return (*(const_cast<tu_string*>(this)))[index]; } void operator+=(const char* str) { int str_length = strlen(str); int old_length = length(); assert(old_length >= 0); resize(old_length + str_length); strcpy(get_buffer() + old_length, str); } void operator+=(char ch) { int old_length = length(); assert(old_length >= 0); resize(old_length + 1); strncpy(get_buffer() + old_length, (char *)&ch, 1); } // Append wide char. Both versions of wide char. void append_wide_char(uint16 ch); void append_wide_char(uint32 ch); void operator+=(const tu_string& str) { int str_length = str.length(); int old_length = length(); assert(old_length >= 0); resize(old_length + str_length); strcpy(get_buffer() + old_length, str.c_str()); } tu_string operator+(const char* str) const // NOT EFFICIENT! But convenient. { tu_string new_string(*this); new_string += str; return new_string; } bool operator<(const char* str) const { return strcmp(c_str(), str) < 0; } bool operator<(const tu_string& str) const { return *this < str.c_str(); } bool operator>(const char* str) const { return strcmp(c_str(), str) > 0; } bool operator>(const tu_string& str) const { return *this > str.c_str(); } void clear() { resize(0); } // Sets buffer size to new_size+1 (i.e. enough room for // new_size chars, plus terminating 0). void resize(int new_size); // Set *result to the UTF-8 encoded version of wstr[]. // Both version of wchar_t. // // Could add operator= overloads, but maybe it's better to // keep this very explicit. static void encode_utf8_from_wchar(tu_string* result, const uint32* wstr); static void encode_utf8_from_wchar(tu_string* result, const uint16* wstr); // Utility: case-insensitive string compare. stricmp() is not // ANSI or POSIX, doesn't seem to appear in Linux. static int stricmp(const char* a, const char* b); // Return the Unicode char at the specified character // position. index is in UTF-8 chars, NOT bytes. uint32 utf8_char_at(int index) const; // Return the string in this container as all upper case letters tu_string utf8_to_upper() const; // Return the string in this container as all lower case letters tu_string utf8_to_lower() const; // Return the number of UTF-8 characters in the given // substring buffer. You must pass in a valid buffer length; // this routine does not look for a terminating \0. static int utf8_char_count(const char* buf, int buflen); int utf8_length() const { return utf8_char_count(get_buffer(), length()); } // Returns a tu_string that's a substring of this. start and // end are in UTF-8 character positions (NOT bytes). // // start is the index of the first character you want to include. // // end is the index one past the last character you want to include. tu_string utf8_substring(int start, int end) const;private: char* get_buffer() { if (using_heap() == false) { return m_local.m_buffer; } else { return m_heap.m_buffer; } } const char* get_buffer() const { return const_cast<tu_string*>(this)->get_buffer(); } bool using_heap() const { bool heap = (m_heap.m_all_ones == (char) ~0); return heap; } // The idea here is that tu_string is a 16-byte structure, // which uses internal storage for strings of 14 characters or // less. For longer strings, it allocates a heap buffer, and // keeps the buffer-tracking info in the same bytes that would // be used for internal string storage. // // A string that's implemented like vector<char> is typically // 12 bytes plus heap storage, so this seems like a decent // thing to try. Also, a zero-length string still needs a // terminator character, which with vector<char> means an // unfortunate heap alloc just to hold a single '0'. union { // Internal storage. struct { char m_size; char m_buffer[15]; } m_local; // Heap storage. struct { char m_all_ones; // flag to indicate heap storage is in effect. int m_size; int m_capacity; char* m_buffer; } m_heap; };};// String-like type; comparisons are CASE INSENSITIVE.// Uses tu_string for implementation.class tu_stringi{public: tu_stringi() {} tu_stringi(const char* str) : m_string(str) {} tu_stringi(const tu_string& str) : m_string(str) {} tu_stringi(const tu_stringi& stri) : m_string(stri.c_str()) {} ~tu_stringi() {} operator const char*() const { return (const char*) m_string; } const char* c_str() const { return m_string.c_str(); } void operator=(const char* str) { m_string = str; } void operator=(const tu_string& str) { m_string = str; } void operator=(const tu_stringi& str) { m_string = str.m_string; } int length() const { return m_string.length(); } int size() const { return length(); } char& operator[](int index) { return m_string[index]; } const char& operator[](int index) const { return m_string[index]; } void operator+=(const char* str) { m_string += str; } void operator+=(const tu_string& str) { m_string += str; } void operator+=(const tu_stringi& str) { m_string += str.m_string; } tu_stringi operator+(const char* str) const { return tu_stringi(m_string + str); } // The special stuff. tu_string& to_tu_string() { return m_string; } const tu_string& to_tu_string() const { return m_string; } bool operator==(const char* str) const { return tu_string::stricmp(*this, str) == 0; } bool operator==(const tu_stringi& str) const { return tu_string::stricmp(*this, str) == 0; } bool operator<(const char* str) const { return tu_string::stricmp(c_str(), str) < 0; } bool operator<(const tu_stringi& str) const { return *this < str.c_str(); } bool operator>(const char* str) const { return tu_string::stricmp(c_str(), str) > 0; } bool operator>(const tu_stringi& str) const { return *this > str.c_str(); } void resize(int new_size) { m_string.resize(new_size); }private: tu_string m_string;};template<class T>class string_hash_functor// Computes a hash of a string-like object (something that has// ::length() and ::[int]).{public: size_t operator()(const T& data) const { int size = data.length(); return bernstein_hash((const char*) data, size); }};template<class U>class string_hash : public hash<tu_string, U, string_hash_functor<tu_string> >{};template<class T>class stringi_hash_functor// Computes a case-insensitive hash of a string-like object (something that has// ::length() and ::[int] and tolower(::[])).{public: size_t operator()(const T& data) const { int size = data.length(); return bernstein_hash_case_insensitive((const char*) data, size); }};// Case-insensitive string hash.template<class U>class stringi_hash : public hash<tu_stringi, U, stringi_hash_functor<tu_stringi> >{};// Utility: handy sprintf wrapper.tu_string string_printf(const char* fmt, ...)#ifdef __GNUC__ // use the following to catch errors: (only with gcc) __attribute__((format (printf, 1, 2)))#endif // not __GNUC__;#endif // CONTAINER_H// Local Variables:// mode: C++// c-basic-offset: 8 // tab-width: 8// indent-tabs-mode: t// End:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -