strngs.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 495 行

CPP
495
字号
/********************************************************************** * File:        strngs.c  (Formerly strings.c) * Description: STRING class functions. * Author:					Ray Smith * Created:					Fri Feb 15 09:13:30 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include          "mfcpch.h"     //precompiled headers#include          "tprintf.h"#include          "strngs.h"/********************************************************************** * DataCache for reducing initial allocations, such as the default * constructor. The memory in this cache is not special, it is just * held locally rather than freeing. Only blocks with the default * capacity are considered for the cache. * * In practice it does not appear that this cache grows very big, * so even 2-4 elements are probably sufficient to realize most * gains. * * The cache is maintained globally with a global destructor to * avoid memory leaks being reported on exit. **********************************************************************/// kDataCacheSize is cache of last n min sized buffers freed for// cheap recylingconst int kDataCacheSize = 8;  // max number of buffers cached#if 1#define CHECK_INVARIANT(s)  // EMPTY#elsestatic void check_used_(int len, const char *s) {  bool ok;  if (len == 0)    ok = (s == NULL);  else    ok = (len == (strlen(s) + 1));  if (!ok)    abort();}#define CHECK_INVARIANT(s)  check_used_(s->GetHeader()->used_, s->string())#endif// put recycled buffers into a class so we can destroy it on exitclass DataCache { public:  DataCache() {    top_ = 0;  }  ~DataCache() {    while (--top_ >= 0)        free_string((char *)stack_[top_]);  }  // Allocate a buffer out of this cache.  // Returs NULL if there are no cached buffers.  // The buffers in the cache can be freed using string_free.  void* alloc() {    if (top_ == 0)      return NULL;    return stack_[--top_];  }  // Free pointer either by caching it on the stack of pointers  // or freeing it with string_free if there isnt space left to cache it.  // s should have capacity kMinCapacity.  void free(void* p) {    if (top_ == kDataCacheSize)      free_string((char *)p);    else      stack_[top_++] = p;  }  // Stack of discarded but not-yet freed pointers.  void* stack_[kDataCacheSize];  // Top of stack, points to element after last cached pointer  int   top_;};static DataCache MinCapacityDataCache;/********************************************************************** * STRING_HEADER provides metadata about the allocated buffer, * including total capacity and how much used (strlen with '\0'). * * The implementation hides this header at the start of the data * buffer and appends the string on the end to keep sizeof(STRING) * unchanged from earlier versions so serialization is not affected. * * The collection of MACROS provide different implementations depending * on whether the string keeps track of its strlen or not so that this * feature can be added in later when consumers dont modifify the string **********************************************************************/// Smallest string to allocate by defaultconst int kMinCapacity = 16;char* STRING::AllocData(int used, int capacity) {  if ((capacity != kMinCapacity)      || ((data_ = (STRING_HEADER *)MinCapacityDataCache.alloc()) == NULL))    data_ = (STRING_HEADER *)alloc_string(capacity + sizeof(STRING_HEADER));  // header is the metadata for this memory block  STRING_HEADER* header = GetHeader();  header->capacity_ = capacity;  header->used_ = used;  return GetCStr();}void STRING::DiscardData() {  STRING_HEADER* header = GetHeader();  if (header->capacity_ == kMinCapacity)    MinCapacityDataCache.free(data_);  else    free_string((char *)data_);}// This is a private method; ensure FixHeader is called (or used_ is well defined)// beforehandchar* STRING::ensure_cstr(inT32 min_capacity) {  STRING_HEADER* orig_header = GetHeader();  if (min_capacity <= orig_header->capacity_)    return ((char *)this->data_) + sizeof(STRING_HEADER);  // if we are going to grow bigger, than double our existing  // size, but if that still is not big enough then keep the  // requested capacity  if (min_capacity < 2 * orig_header->capacity_)    min_capacity = 2 * orig_header->capacity_;  int alloc = sizeof(STRING_HEADER) + min_capacity;  STRING_HEADER* new_header = (STRING_HEADER*)(alloc_string(alloc));  memcpy(&new_header[1], GetCStr(), orig_header->used_);  new_header->capacity_ = min_capacity;  new_header->used_ = orig_header->used_;  // free old memory, then rebind to new memory  DiscardData();  data_ = new_header;  CHECK_INVARIANT(this);  return ((char *)data_) + sizeof(STRING_HEADER);}// This is const, but is modifying a mutable field// this way it can be used on const or non-const instances.void STRING::FixHeader() const {  const STRING_HEADER* header = GetHeader();  if (header->used_ < 0)    header->used_ = strlen(GetCStr()) + 1;}STRING::STRING() {  // 0 indicates old NULL -- it doesnt even have '\0'  AllocData(0, kMinCapacity);}STRING::STRING(const STRING& str) {  str.FixHeader();  const STRING_HEADER* str_header  = str.GetHeader();  int   str_used  = str_header->used_;  char *this_cstr = AllocData(str_used, str_used);  memcpy(this_cstr, str.GetCStr(), str_used);  CHECK_INVARIANT(this);}STRING::STRING(const char* cstr) {  if (cstr == NULL) {    AllocData(0, 0);  } else {    int len = strlen(cstr) + 1;    char* this_cstr = AllocData(len, len);    memcpy(this_cstr, cstr, len);  }  CHECK_INVARIANT(this);}STRING::~STRING() {  DiscardData();}BOOL8 STRING::contains(const char c) const {  return (c != '\0') && (strchr (GetCStr(), c) != NULL);}inT32 STRING::length() const {  FixHeader();  return GetHeader()->used_ - 1;}const char* STRING::string() const {  const STRING_HEADER* header = GetHeader();  if (header->used_ == 0)    return NULL;  // mark header length unreliable because tesseract might  // cast away the const and mutate the string directly.  header->used_ = -1;  return GetCStr();}/****** * The STRING_IS_PROTECTED interface adds additional support to migrate * code that needs to modify the STRING in ways not otherwise supported * without violating encapsulation. * * Also makes the [] operator return a const so it is immutable */#if STRING_IS_PROTECTEDconst char& STRING::operator[](inT32 index) const {  return GetCStr()[index];}void STRING::insert_range(inT32 index, const char* str, int len) {  // if index is outside current range, then also grow size of string  // to accmodate the requested range.  STRING_HEADER* this_header = GetHeader();  int used = this_header->used_;  if (index > used)    used = index;  char* this_cstr = ensure_cstr(used + len + 1);  if (index < used) {    // move existing string from index to '\0' inclusive.    memmove(this_cstr + index + len,           this_cstr + index,           this_header->used_ - index);  } else if (len > 0) {    // We are going to overwrite previous null terminator, so write the new one.    this_cstr[this_header->used_ + len - 1] = '\0';    // If the old header did not have the terminator,    // then we need to account for it now that we've added it.    // Otherwise it was already accounted for; we just moved it.    if (this_header->used_ == 0)      ++this_header->used_;  }  // Write new string to index.  // The string is already terminated from the conditions above.  memcpy(this_cstr + index, str, len);  this_header->used_ += len;  CHECK_INVARIANT(this);}void STRING::erase_range(inT32 index, int len) {  char* this_cstr = GetCStr();  STRING_HEADER* this_header = GetHeader();  memcpy(this_cstr+index, this_cstr+index+len,         this_header->used_ - index - len);  this_header->used_ -= len;  CHECK_INVARIANT(this);}void STRING::truncate_at(inT32 index) {  char* this_cstr = ensure_cstr(index);  this_cstr[index] = '\0';  GetHeader()->used_ = index;  CHECK_INVARIANT(this);}#elsechar& STRING::operator[](inT32 index) const {  // Code is casting away this const and mutating the string,  // so mark used_ as -1 to flag it unreliable.  GetHeader()->used_ = -1;  return ((char *)GetCStr())[index];}#endifBOOL8 STRING::operator==(const STRING& str) const {  FixHeader();  str.FixHeader();  const STRING_HEADER* str_header = str.GetHeader();  const STRING_HEADER* this_header = GetHeader();  int this_used = this_header->used_;  int str_used  = str_header->used_;  return (this_used == str_used)          && (memcmp(GetCStr(), str.GetCStr(), this_used) == 0);}BOOL8 STRING::operator!=(const STRING& str) const {  FixHeader();  str.FixHeader();  const STRING_HEADER* str_header = str.GetHeader();  const STRING_HEADER* this_header = GetHeader();  int this_used = this_header->used_;  int str_used  = str_header->used_;  return (this_used != str_used)         || (memcmp(GetCStr(), str.GetCStr(), this_used) != 0);}BOOL8 STRING::operator!=(const char* cstr) const {  FixHeader();  const STRING_HEADER* this_header = GetHeader();  if (cstr == NULL)    return this_header->used_ > 1;  // either '\0' or NULL  else {    inT32 length = strlen(cstr) + 1;    return (this_header->used_ != length)            || (memcmp(GetCStr(), cstr, length) != 0);  }}STRING& STRING::operator=(const STRING& str) {  str.FixHeader();  const STRING_HEADER* str_header = str.GetHeader();  int   str_used = str_header->used_;  GetHeader()->used_ = 0;  // clear since ensure doesnt need to copy data  char* this_cstr = ensure_cstr(str_used);  STRING_HEADER* this_header = GetHeader();  memcpy(this_cstr, str.GetCStr(), str_used);  this_header->used_ = str_used;  CHECK_INVARIANT(this);  return *this;}STRING & STRING::operator+=(const STRING& str) {  FixHeader();  str.FixHeader();  const STRING_HEADER* str_header = str.GetHeader();  const char* str_cstr = str.GetCStr();  int  str_used  = str_header->used_;  int  this_used = GetHeader()->used_;  char* this_cstr = ensure_cstr(this_used + str_used);  STRING_HEADER* this_header = GetHeader();  // after ensure for realloc  if (this_used > 1) {    memcpy(this_cstr + this_used - 1, str_cstr, str_used);    this_header->used_ += str_used - 1;  // overwrite '\0'  } else {    memcpy(this_cstr, str_cstr, str_used);    this_header->used_ = str_used;  }  CHECK_INVARIANT(this);  return *this;}void STRING::prep_serialise() {  // WARNING  // This method should only be called on a shallow bitwise copy  // by the serialise() method (see serialis.h).  FixHeader();  data_ = (STRING_HEADER *)GetHeader()->used_;}void STRING::dump(FILE* f) {  FixHeader();  serialise_bytes (f, data_, GetHeader()->used_);}void STRING::de_dump(FILE* f) {  char *instring;            //input from read  fprintf(stderr, "de_dump\n");  instring = (char *)de_serialise_bytes(f, (ptrdiff_t)data_);  int len = strlen(instring) + 1;  char* this_cstr = AllocData(len, len);  STRING_HEADER* this_header = GetHeader();  memcpy(this_cstr, instring, len);  this_header->used_ = len;  free_mem(instring);  CHECK_INVARIANT(this);}STRING & STRING::operator=(const char* cstr) {  STRING_HEADER* this_header = GetHeader();  if (cstr) {    int len = strlen(cstr) + 1;    this_header->used_ = 0;  // dont bother copying data if need to realloc    char* this_cstr = ensure_cstr(len);    this_header = GetHeader();  // for realloc    memcpy(this_cstr, cstr, len);    this_header->used_ = len;  }  else {    // preserve old behavior    *GetCStr() = '\0';    this_header->used_ = 1;  }  CHECK_INVARIANT(this);  return *this;}STRING STRING::operator+(const STRING& str) const {  STRING result(*this);  result += str;  CHECK_INVARIANT(this);  return result;}STRING STRING::operator+(const char ch) const {  STRING result;  FixHeader();  const STRING_HEADER* this_header = GetHeader();  int this_used = this_header->used_;  char* result_cstr = result.ensure_cstr(this_used + 1);  STRING_HEADER* result_header = result.GetHeader();  int result_used = result_header->used_;  // copies '\0' but we'll overwrite that  memcpy(result_cstr, GetCStr(), this_used);  result_cstr[result_used] = ch;      // overwrite old '\0'  result_cstr[result_used + 1] = '\0';  // append on '\0'  ++result_header->used_;  CHECK_INVARIANT(this);  return result;}STRING&  STRING::operator+=(const char *str) {  if (!str || !*str)  // empty string has no effect    return *this;  FixHeader();  int len = strlen(str) + 1;  int this_used = GetHeader()->used_;  char* this_cstr = ensure_cstr(this_used + len);  STRING_HEADER* this_header = GetHeader();  // after ensure for realloc  // if we had non-empty string then append overwriting old '\0'  // otherwise replace  if (this_used > 0) {    memcpy(this_cstr + this_used - 1, str, len);    this_header->used_ += len - 1;  } else {    memcpy(this_cstr, str, len);    this_header->used_ = len;  }  CHECK_INVARIANT(this);  return *this;}STRING& STRING::operator+=(const char ch) {  if (ch == '\0')    return *this;  FixHeader();  int   this_used = GetHeader()->used_;  char* this_cstr = ensure_cstr(this_used + 1);  STRING_HEADER* this_header = GetHeader();  if (this_used > 0)    --this_used; // undo old empty null if there was one  this_cstr[this_used++] = ch;   // append ch to end  this_cstr[this_used++] = '\0'; // append '\0' after ch  this_header->used_ = this_used;  CHECK_INVARIANT(this);  return *this;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?