📄 lineread.cpp
字号:
// lineread.cc// code for lineread.h// copyright SafeTP Development Group, Inc., 2000 Terms of use are as specified in license.txt#include "lineread.h" // this module#include "inputsrc.h" // StreamInputSource#include "xassert.h" // xassert#include <string.h> // memcpyStreamLineReader::StreamLineReader(StreamInputSource &src, int buflen) : source(src), eof(false), bufferLen(buflen){ buffer = new char[bufferLen+1]; buffer[bufferLen] = (char)ENDPOST_VALUE; start = end = buffer; SELFCHECK();}StreamLineReader::~StreamLineReader(){ SELFCHECK(); delete[] buffer;}// this function is more complicated than I had hoped, and does// not offer an easy way to abstract the CRLF test itself, due// to desire for unlimited string lengths (hence inability to// just use two buffers); I've done what I can to keep it as// simple as possible, with minimal duplicated code// NOTE: this is coded with the expectation that 'read' will// throw exceptions sometimes (i.e. invariants hold// across that call)bool StreamLineReader::getNextLine(string &str){ SELFCHECK(); // check for already being at EOF (necessary to handle input // that may lack final CRLF) if (eof) { return false; } // initialize crlf, which will be used to search for CRLF char *crlf = start; // loop until no CRLF and no room in buffer for(int iters=0; ; iters++) { // ensure we don't get into an infinite loop due // to programming error xassert(iters <= bufferLen*2); // scan for CRLF (end-1 because if crlf == end-1 then // we're testing *end when we test crlf[1]) while (crlf < end-1 && !(crlf[0] == CR && crlf[1] == LF)) { crlf++; } // see if we found it if (crlf < end-1) { // found a complete line; copy to string str.setlength(crlf - start); memcpy(str.pchar(), start, crlf-start); // advance 'start' beyond this CRLF start = crlf + 2; // never beyond end // tell caller about success SELFCHECK(); return true; } // if there is room in the buffer, add more data to it if ((end-buffer) < bufferLen) { // fill the buffer with at least one more character // (whatever the stream input can provide), blocking // if nothing is available immediately int readlen = fillBuffer(); if (readlen == 0) { // EOF; return what we have, ignoring lack of final CRLF eof = true; int retlen = end-start; str.setlength(retlen); memcpy(str.pchar(), start, retlen); str[retlen] = 0; // start==end: return false to indicate no string read // start!=end: return true to indicate a string was read; // next call to getNextLine will return false // because 'eof' is now set SELFCHECK(); return !(start==end); } } else { // no room in buffer, break out of loop to // handle as special case break; } } // we have reached the end of the buffer: // transfer what we have into temporary storage int prefixLen = end-start; string prefix(start, prefixLen); // reset buffer pointers start = end = buffer; // refill buffer with at least one more char int readlen = fillBuffer(); if (readlen == 0) { // EOF; return what we have as last string, ignore // lack of terminating CRLF eof = true; str = prefix; SELFCHECK(); return prefixLen > 0; } // see if the CRLF happened to straddle the buffer boundary if (prefixLen > 0 && prefix[prefixLen-1] == CR && start[0] == LF) { // 'prefix' currently has the CR, so return the string // without it str = prefix; str[prefixLen-1] = 0; // throw away CR // advance 'start' past the LF start++; // found string SELFCHECK(); return true; } // since we don't have the odd case of CRLF straddling, // just recursively read a string as in the normal case, // then we'll prepend prefix before returning it (this // is the slow-but-correct unusual case) string postfix; xassert(eof==false && start!=end); // may as well check if (!getNextLine(postfix)) { // not possible xassert(!"it is not possible for getNextLine to return false " "when recursively invoked, because we already know " "that eof==false and start!=end"); } // return the combined strings int postfixLen = postfix.length(); str.setlength(prefixLen + postfixLen); memcpy(str.pchar(), prefix.pcharc(), prefixLen); memcpy(str.pchar() + prefixLen, postfix.pcharc(), postfixLen); str[prefixLen+postfixLen] = 0; SELFCHECK(); return true;}// block until we can read at least one character// (invariants are checked because this is where we// expect exceptions to be routinely thrown)int StreamLineReader::fillBuffer(){ SELFCHECK(); int readlen = source.read(end, (buffer+bufferLen) - end); end += readlen; SELFCHECK(); return readlen;}// we have unprocessed data anytime there is a// nonempty string between 'start' and 'end'bool StreamLineReader::hasUnprocessedData() const{ SELFCHECK(); return !eof && (start != end);}// xassert invariants (each on a separate line for// improved diagnostic reports, at the expense// of greater object code size)void StreamLineReader::selfCheck() const{ xassert(buffer != NULL); xassert((byte)buffer[bufferLen] == ENDPOST_VALUE); xassert(bufferLen > 0); xassert(0 <= (start-buffer) && (start-buffer) <= bufferLen); xassert((start-buffer) <= (end-buffer) && (end-buffer) <= bufferLen);}// --------------- test code --------------------#ifdef LINEREAD_TEST#include "memsrc.h" // MemoryInputSource#include "exc.h" // xBase#include <stdio.h> // printf#include <ctype.h> // isprint// print a string, with "\xNN" for nonprintingvoid printstr(char const *s){ while (*s) { if (isprint(*s)) { printf("%c", *s); } else { printf("\\x%02X", (unsigned char)*s); } s++; }}// print all the CRLF-separated strings in a streamvoid printStrings(StreamLineReader &reader){ string s; for(;;) { // query reader bool hasUnp = reader.hasUnprocessedData(); bool hasNext = reader.getNextLine(s); // check for right relationship: // hasUnp implies hasNext xassert(hasNext || !hasUnp); // loop termination if (!hasNext) { break; } // print what we have printf("string: "); printstr(s); printf("\n"); }}void printBufferStrings(char const *buffer, int bufLen){ printf("buffer length: %d\n", bufLen); MemoryInputSource src(buffer, bufLen); StreamLineReader reader(src, 10 /*buffersize*/); printStrings(reader);}// test data of strings, working to hit corner cases// for buffer size of 10char const testData[] = "12345\r\n" // 7 - small string "abcdef\r\n" // 15 - string spanning border "987\r\n" // 20 - string ending at border "wx\ryz\r\n" // 27 - embedded CR "ABCD\nEF\r\n" // 36 - wrap, embedded LF "ijk\r\n" // 41 - CRLF straddles border "more\r\n" // 47 - simple "in\0zero\r\n" // 56 - embedded null "mno\r\n" // 61 - straddles border "missing" // 68 - last has no CRLF ;// another test set, to test proper CRLF terminationchar const testData2[] = "one\r\n" // 5 - normal "two\r\n" // 10 - last, CRLF terminated ;// test programint doit(){ printBufferStrings(testData, TABLESIZE(testData)-1); printBufferStrings(testData2, TABLESIZE(testData2)-1); printBufferStrings(testData2, 5); printBufferStrings(testData2, 0); printf("finished tests\n"); return 0;}// eh thunkint main(){ try { return doit(); } catch (xBase &x) { return printf("exception caught: %s\n", x.why()); }}#endif // LINEREAD_TEST
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -