📄 extendentitymanager.cxx
字号:
size_t decode(Char *, const char *, size_t, const char **);private: Owner<Decoder> sub_; ConstPtr<CharMapResource<Unsigned32> > map_;};MappingDecoder::MappingDecoder(Decoder *sub, const ConstPtr<CharMapResource<Unsigned32> > &map): Decoder(sub->minBytesPerChar()), sub_(sub), map_(map){}size_t MappingDecoder::decode(Char *to, const char *s, size_t slen, const char **rest){ size_t n = sub_->decode(to, s, slen, rest); const CharMap<Unsigned32> &map = *map_; for (size_t i = 0; i < n; i++) { Unsigned32 d = map[to[i]]; if (d & (unsigned(1) << 31)) to[i] = (d & ~(unsigned(1) << 31)); else to[i] += d; } return n;}Boolean MappingDecoder::convertOffset(unsigned long &offset) const{ return sub_->convertOffset(offset);} ExternalInputSource::ExternalInputSource(ParsedSystemId &parsedSysid, const CharsetInfo &systemCharset, const CharsetInfo &docCharset, Boolean internalCharsetIsDocCharset, Char replacementChar, InputSourceOrigin *origin, unsigned flags): InputSource(origin, 0, 0), mayRewind_((flags & EntityManager::mayRewind) != 0), mayNotExist_((flags & ExtendEntityManager::mayNotExist) != 0), sov_(parsedSysid.size()), internalCharsetIsDocCharset_(internalCharsetIsDocCharset), // hack maySetDocCharset_((flags & EntityManager::maySetDocCharset) != 0), replacementChar_(replacementChar){ for (size_t i = 0; i < parsedSysid.size(); i++) { if (parsedSysid[i].codingSystemType != (internalCharsetIsDocCharset ? StorageObjectSpec::bctf : StorageObjectSpec::encoding) && parsedSysid[i].codingSystemType != StorageObjectSpec::special) { map_ = new CharMapResource<Unsigned32>; buildMap(systemCharset, docCharset); break; } } for (size_t i = 0; i < sov_.size(); i++) sov_[i] = 0; init(); info_ = new ExternalInfoImpl(parsedSysid); origin->setExternalInfo(info_);}void ExternalInputSource::setDocCharset(const CharsetInfo &docCharset, const CharsetInfo &systemCharset){ if (!map_.isNull()) buildMap(systemCharset, docCharset); willNotSetDocCharset();}void ExternalInputSource::willNotSetDocCharset(){ maySetDocCharset_ = 0;}void ExternalInputSource::buildMap(const CharsetInfo &systemCharset, const CharsetInfo &docCharset){ CharMap<Unsigned32> &map = *map_; // FIXME How should invalidChar be chosen when internalCharsetIsDocCharset_? Char invalidChar = internalCharsetIsDocCharset_ ? 0 : replacementChar_; map.setAll((Unsigned32(1) << 31) | invalidChar); if (internalCharsetIsDocCharset_) buildMap1(systemCharset, docCharset); else buildMap1(docCharset, systemCharset);}void ExternalInputSource::buildMap1(const CharsetInfo &fromCharset, const CharsetInfo &toCharset){ UnivCharsetDescIter iter(fromCharset.desc()); for (;;) { WideChar descMin, descMax; UnivChar univMin; if (!iter.next(descMin, descMax, univMin)) break; if (descMin > charMax) break; if (descMax > charMax) descMax = charMax; WideChar totalCount = 1 + (descMax - descMin); do { WideChar count; WideChar toMin; ISet<WideChar> set; int nMap = toCharset.univToDesc(univMin, toMin, set, count); if (count > totalCount) count = totalCount; if (nMap && toMin <= charMax) { Char toMax; if (count - 1 > charMax - toMin) toMax = charMax; else toMax = toMin + (count - 1); map_->setRange(descMin, descMin + (toMax - toMin), Char(toMin - descMin)); } descMin += count; univMin += count; totalCount -= count; } while (totalCount > 0); }}void ExternalInputSource::init(){ so_ = 0; buf_ = 0; bufSize_ = 0; bufLim_ = 0; bufLimOffset_ = 0; insertRS_ = true; soIndex_ = 0; leftOver_ = 0; nLeftOver_ = 0; }ExternalInputSource::~ExternalInputSource(){ if (buf_) delete [] buf_;}Boolean ExternalInputSource::rewind(Messenger &mgr){ reset(0, 0); if (buf_) delete [] buf_; // reset makes a new EntityOrigin ParsedSystemId parsedSysid(info_->parsedSystemId()); ExternalInfoImpl *oldInfo = info_; info_ = new ExternalInfoImpl(parsedSysid); so_ = 0; for (size_t i = 0; i < soIndex_; i++) { if (sov_[i] && !sov_[i]->rewind(mgr)) return 0; StringC tem; oldInfo->getId(i, tem); info_->setId(i, tem); } inputSourceOrigin()->setExternalInfo(info_); init(); return 1;}void ExternalInputSource::willNotRewind(){ for (size_t i = 0; i < sov_.size(); i++) if (sov_[i]) sov_[i]->willNotRewind(); mayRewind_ = 0;}// Round up N so that it is a power of TO.// TO must be a power of 2.inlinesize_t roundUp(size_t n, size_t to){ return (n + (to - 1)) & ~(to - 1);}inlinevoid ExternalInputSource::noteRSAt(const Char *p){ info_->noteRS(bufLimOffset_ - (bufLim_ - p));}inlinevoid ExternalInputSource::noteRS(){ noteRSAt(cur());}Xchar ExternalInputSource::fill(Messenger &mgr){ ASSERT(cur() == end()); while (end() >= bufLim_) { // need more data while (so_ == 0) { if (soIndex_ >= sov_.size()) return eE; if (soIndex_ > 0) info_->noteStorageObjectEnd(bufLimOffset_ - (bufLim_ - end())); const StorageObjectSpec &spec = info_->spec(soIndex_); if (!sov_[soIndex_]) { StringC id; if (mayNotExist_) { NullMessenger nullMgr; sov_[soIndex_] = spec.storageManager->makeStorageObject(spec.specId, spec.baseId, spec.search, mayRewind_, nullMgr, id); } else sov_[soIndex_] = spec.storageManager->makeStorageObject(spec.specId, spec.baseId, spec.search, mayRewind_, mgr, id); info_->setId(soIndex_, id); } so_ = sov_[soIndex_].pointer(); if (so_) { decoder_ = spec.codingSystem->makeDecoder(); if (spec.codingSystemType != StorageObjectSpec::special && spec.codingSystemType != (internalCharsetIsDocCharset_ ? StorageObjectSpec::bctf : StorageObjectSpec::encoding)) { decoder_ = new MappingDecoder(decoder_, map_); if (maySetDocCharset_) { sov_[soIndex_] = new UnbufferingStorageObject(sov_[soIndex_].extract(), &maySetDocCharset_); so_ = sov_[soIndex_].pointer(); } } info_->setDecoder(soIndex_, decoder_); zapEof_ = spec.zapEof; switch (spec.records) { case StorageObjectSpec::asis: recordType_ = asis; insertRS_ = false; break; case StorageObjectSpec::cr: recordType_ = cr; break; case StorageObjectSpec::lf: recordType_ = lf; break; case StorageObjectSpec::crlf: recordType_ = crlf; break; case StorageObjectSpec::find: recordType_ = unknown; break; default: CANNOT_HAPPEN(); } soIndex_++; readSize_ = so_->getBlockSize(); nLeftOver_ = 0; break; } else setAccessError(); soIndex_++; } size_t keepSize = end() - start(); const size_t align = sizeof(int)/sizeof(Char); size_t readSizeChars = (readSize_ + (sizeof(Char) - 1))/sizeof(Char); readSizeChars = roundUp(readSizeChars, align); size_t neededSize; // in Chars size_t startOffset; // compute neededSize and readSize unsigned minBytesPerChar = decoder_->minBytesPerChar(); if (nLeftOver_ == 0 && minBytesPerChar >= sizeof(Char)) { // In this case we want to do decoding in place. // FIXME It might be a win on some systems (Irix?) to arrange that the // read buffer is on a page boundary. if (keepSize >= size_t(-1)/sizeof(Char) - (align - 1) - insertRS_) abort(); // FIXME throw an exception // Now size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize > 0 if (readSizeChars > size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize) abort(); neededSize = roundUp(readSizeChars + keepSize + insertRS_, align); startOffset = ((neededSize > bufSize_ ? neededSize : bufSize_) - readSizeChars - insertRS_ - keepSize); } else { // Needs to be room for everything before decoding. neededSize = (keepSize + insertRS_ + readSizeChars + (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char)); // Also must be room for everything after decoding. size_t neededSize2 = (keepSize + insertRS_ // all the converted characters + (nLeftOver_ + readSize_)/minBytesPerChar // enough Chars to contain left over bytes + ((readSize_ % minBytesPerChar + sizeof(Char) - 1) / sizeof(Char))); if (neededSize2 > neededSize) neededSize = neededSize2; neededSize = roundUp(neededSize, align); if (neededSize > size_t(-1)/sizeof(Char)) abort(); startOffset = 0; } if (bufSize_ < neededSize) reallocateBuffer(neededSize); Char *newStart = buf_ + startOffset; if (newStart != start() && keepSize > 0) memmove(newStart, start(), keepSize*sizeof(Char)); char *bytesStart = (char *)(buf_ + bufSize_ - readSizeChars) - nLeftOver_; if (nLeftOver_ > 0 && leftOver_ != bytesStart) memmove(bytesStart, leftOver_, nLeftOver_); moveStart(newStart); bufLim_ = end(); size_t nread; if (so_->read((char *)(buf_ + bufSize_ - readSizeChars), readSize_, mgr, nread)) { if (nread > 0) { const char *bytesEnd = bytesStart + nLeftOver_ + nread; size_t nChars = decoder_->decode((Char *)end() + insertRS_, bytesStart, nLeftOver_ + nread - (zapEof_ && bytesEnd[-1] == EOFCHAR), &leftOver_); nLeftOver_ = bytesEnd - leftOver_; if (nChars > 0) { if (insertRS_) { noteRS(); *(Char *)end() = RS; advanceEnd(end() + 1); insertRS_ = false; bufLim_ += 1; bufLimOffset_ += 1; } bufLim_ += nChars; bufLimOffset_ += nChars; break; } } } else so_ = 0; } ASSERT(end() < bufLim_); if (insertRS_) { noteRS(); insertChar(RS); insertRS_ = false; bufLimOffset_ += 1; } switch (recordType_) { case unknown: { const Char *e = findNextCrOrLf(end(), bufLim_); if (e) { if (*e == '\n') { recordType_ = lf; info_->noteInsertedRSs(); *(Char *)e = RE; advanceEnd(e + 1); insertRS_ = true; } else { if (e + 1 < bufLim_) { if (e[1] == '\n') { recordType_ = crlf; advanceEnd(e + 1); if (e + 2 == bufLim_) { bufLim_--; bufLimOffset_--; insertRS_ = true; } } else { advanceEnd(e + 1); recordType_ = cr; info_->noteInsertedRSs(); insertRS_ = true; } } else { recordType_ = crUnknown; advanceEnd(e + 1); } } } else advanceEnd(bufLim_); } break; case crUnknown: { if (*cur() == '\n') { noteRS(); advanceEnd(cur() + 1); recordType_ = crlf; } else { advanceEnd(cur() + 1); insertRS_ = true; recordType_ = cr; info_->noteInsertedRSs(); } } break; case lf: { Char *e = (Char *)findNextLf(end(), bufLim_); if (e) { advanceEnd(e + 1); *e = RE; insertRS_ = true; } else advanceEnd(bufLim_); } break; case cr: { const Char *e = findNextCr(end(), bufLim_); if (e) { advanceEnd(e + 1); insertRS_ = true; } else advanceEnd(bufLim_); } break; case crlf: { const Char *e = end(); for (;;) { e = findNextLf(e, bufLim_); if (!e) { advanceEnd(bufLim_); break; } // Need to delete final RS if not followed by anything. if (e + 1 == bufLim_) { bufLim_--; bufLimOffset_--; advanceEnd(e); insertRS_ = true; if (cur() == end()) return fill(mgr); break; } noteRSAt(e); e++; } } break; case asis: advanceEnd(bufLim_); break; default: CANNOT_HAPPEN(); } ASSERT(cur() < end()); return nextChar();}const Char *ExternalInputSource::findNextCr(const Char *start, const Char *end){ for (; start < end; start++) if (*start == '\r') return start; return 0;}const Char *ExternalInputSource::findNextLf(const Char *start, const Char *end){ for (; start < end; start++) if (*start == '\n') return start; return 0;}const Char *ExternalInputSource::findNextCrOrLf(const Char *start, const Char *end){ for (; start < end; start++) if (*start == '\n' || *start == '\r') return start; return 0;}void ExternalInputSource::pushCharRef(Char ch, const NamedCharRef &ref){ ASSERT(cur() == start()); noteCharRef(startIndex() + (cur() - start()), ref); insertChar(ch);}void ExternalInputSource::insertChar(Char ch){ if (start() > buf_) { if (cur() > start()) memmove((Char *)start() - 1, start(), (cur() - start())*sizeof(Char)); moveLeft(); *(Char *)cur() = ch; } else { // must have start == buf if (buf_ + (bufSize_ - (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char)) == bufLim_) { if (bufSize_ == size_t(-1)) abort(); // FIXME throw an exception reallocateBuffer(bufSize_ + 1); } else if (nLeftOver_ > 0 && ((char *)(bufLim_ + 1) > leftOver_)) { char *s = (char *)(buf_ + bufSize_) - nLeftOver_; memmove(s, leftOver_, nLeftOver_); leftOver_ = s; } if (cur() < bufLim_) memmove((Char *)cur() + 1, cur(), (bufLim_ - cur())*sizeof(Char)); *(Char *)cur() = ch; advanceEnd(end() + 1); bufLim_ += 1; }}void ExternalInputSource::reallocateBuffer(size_t newSize)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -