📄 textfile.cpp
字号:
#if PEK_TX_TECHLEVEL==0
//If not Windows, do this
m_file.open(filename, ios::binary | ios::in);
#elif PEK_TX_TECHLEVEL == 1
m_hFile = ::CreateFile( filename,
GENERIC_READ,
FILE_SHARE_READ,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
#else
m_file = new CFile;
//In windows, do this
m_file->Open(filename, CFile::modeRead | CFile::shareDenyWrite );
m_closeAndDeleteFile = true;
#endif
m_firstLine = true;
m_endoffile = (IsOpen()==0);
//Force reading to buffer next time
m_buffpos=-1;
m_useExtraBuffer=false;
ReadBOM();
}
#if PEK_TX_TECHLEVEL==2
CTextFileRead::CTextFileRead(CFile* file)
{
ASSERT(file);
m_file = file;
m_closeAndDeleteFile = false;
m_firstLine = true;
m_endoffile = (IsOpen()==0);
//Force reading to buffer next time
m_buffpos=-1;
m_useExtraBuffer=false;
ReadBOM();
}
#endif
void CTextFileRead::ReadBOM()
{
if( IsOpen() )
{
unsigned char bytes[2];
//Read the first two bytes
ReadByte(bytes[0]);
ReadByte(bytes[1]);
//Figure out what format the file is in
if( bytes[0] == 0xFF && bytes[1] == 0xFE)
m_encoding = UNI16_LE;
else if( bytes[0] == 0xFE && bytes[1] == 0xFF)
m_encoding = UNI16_BE;
else if( bytes[0] == 0xEF && bytes[1] == 0xBB)
{
//This is probably UTF-8, check the third byte
unsigned char temp;
ReadByte(temp);
if( temp == 0xBF)
m_encoding = UTF_8;
else
{
//Set text format.
m_encoding = ASCII;
ResetFilePointer();
}
}
else
{
m_encoding = ASCII;
//Set start pos
ResetFilePointer();
}
}
}
//End of file?
bool CTextFileRead::Eof() const
{
return m_endoffile;
}
//Read one byte
void CTextFileRead::ReadByte(unsigned char& ch)
{
//Use extrabuffer if needed
if(m_useExtraBuffer)
{
m_useExtraBuffer=false;
ch = m_extraBuffer_char;
return;
}
//In Windows, do this...
//If buffer used or not read
if(m_buffpos==-1 || m_buffpos == BUFFSIZE-1)
{
#if PEK_TX_TECHLEVEL==0
m_buffsize = m_file.read(m_buf, BUFFSIZE).gcount();
#elif PEK_TX_TECHLEVEL==1
DWORD dwRead;
if (!::ReadFile(m_hFile, m_buf, BUFFSIZE, &dwRead, NULL))
{
//Couldn't read!
Close();
m_buffsize = 0;
//Throw exception
throw CTextFileException(GetLastError());
}
else
m_buffsize = (int) dwRead;
#else
m_buffsize=m_file->Read(m_buf, BUFFSIZE);
#endif
if(m_buffsize == 0)
{
m_endoffile=true;
ch = 0;
return;
}
m_buffpos=0;
}
else
{
m_buffpos++;
if(m_buffpos >= m_buffsize)
{
m_endoffile=true;
ch = 0;
return;
}
}
ch = m_buf[m_buffpos];
}
void CTextFileRead::ReadWchar(wchar_t& ch)
{
if(m_useExtraBuffer)
{
m_useExtraBuffer=false;
ch = m_extraBuffer_wchar;
return;
}
if(m_encoding == UTF_8)
{
//This is quite tricky :-/
//http://www.cl.cam.ac.uk/~mgk25/unicode.html#examples
unsigned char byte;
ReadByte(byte);
int onesBeforeZero = 0;
{ //Calc how many ones before the first zero...
unsigned char temp = byte;
while( (temp & 0x80)!=0 )
{
temp = (unsigned char) (temp << 1);
onesBeforeZero++;
}
}
if(onesBeforeZero==0)
{
ch = byte;
return;
}
else if(onesBeforeZero == 2)
{
//U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
unsigned char byteb;
ReadByte(byteb);
ch = (wchar_t) ( ((0x1F & byte) << 6)|
(0x3F & byteb)
) ;
return;
}
else if(onesBeforeZero == 3)
{
//U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
unsigned char byteb, bytec;
ReadByte(byteb);
ReadByte(bytec);
ch = (wchar_t) ( ((0x0F & byte) << 12) |
((0x3F & byteb) << 6) |
(0x3F & bytec) );
return;
}
//This should never happend! It it do, something is wrong with the file.
ch = 0xFFFD;
}
else
{
unsigned char bytes[2];
ReadByte(bytes[0]);
ReadByte(bytes[1]);
if(m_encoding == UNI16_BE)
ch = (wchar_t) ( ((wchar_t) bytes[0] << 8) |
(wchar_t) bytes[1]
) ;
else
ch = (wchar_t) ( ((wchar_t) bytes[1] << 8) |
(wchar_t) bytes[0]
);
}
}
void CTextFileRead::ResetFilePointer()
{
m_useExtraBuffer=false;
#if PEK_TX_TECHLEVEL==0
m_file.clear();
m_file.seekg(0, ios::beg);
#elif PEK_TX_TECHLEVEL==1
::SetFilePointer(m_hFile, 0, NULL, FILE_BEGIN);
#else
m_file->SeekToBegin();
#endif
//Force reread buffer
m_buffpos=-1;
m_firstLine = true;
m_endoffile = false;
}
bool CTextFileRead::ReadLine(string& line)
{
//EOF?
if(Eof())
return false;
if(m_encoding == ASCII)
{
return ReadCharLine(line);
}
wstring wline;
if(!ReadWcharLine(wline))
return false;
//Convert
WcharToString(wline.c_str(), line);
return true;
}
bool CTextFileRead::ReadLine(wstring& line)
{
//EOF?
if(Eof())
return false;
if(m_encoding == ASCII)
{
string cline;
if(!ReadCharLine(cline))
return false;
//Convert to wstring
CharToWstring(cline.c_str(), line);
return true;
}
return ReadWcharLine(line);
}
bool CTextFileRead::Read(string& all, const string newline)
{
if(!IsOpen())
return false;
int buffsize = GuessCharacterCount()+2;
int buffpos = 0;
//Create buffer
char* buffer = new char[buffsize];
//If not possible, don't use any buffer
if(buffer == NULL)
buffsize = 0;
string temp;
all = temp;
all.reserve(buffsize);
bool firstLine=true;
while(!Eof())
{
if(ReadLine(temp))
{
//Add new line, if not first line
if(!firstLine)
temp.insert(0, newline.c_str());
else
firstLine=false;
//Add to buffer if possible
if(buffpos + (int) temp.size() < buffsize)
{
strcpy_s(buffer+buffpos, temp.size(), temp.c_str());
buffpos += (int) temp.size();
}
else
{
//Copy to all string
if(buffpos != 0)
{
all.append(buffer, buffpos);
buffpos = 0;
}
all += temp;
}
}
};
//Copy to all string
if(buffpos != 0)
{
all.append(buffer, buffpos);
}
if(buffer != NULL)
delete [] buffer;
return true;
}
bool CTextFileRead::Read(wstring& all, const wstring newline)
{
if(!IsOpen())
return false;
int buffsize = GuessCharacterCount()+2;
int buffpos = 0;
//Create buffer
wchar_t* buffer = new wchar_t[buffsize];
//If not possible, don't use any buffer
if(buffer == NULL)
buffsize = 0;
wstring temp;
all = temp;
all.reserve(buffsize);
bool firstLine=true;
while(!Eof())
{
if(ReadLine(temp))
{
//Add new line, if not first line
if(!firstLine)
temp.insert(0, newline.c_str());
else
firstLine=false;
//Add to buffer if possible
if(buffpos + (int) temp.size() < buffsize)
{
wcscpy_s(buffer+buffpos, temp.size(), temp.c_str());
buffpos += (int) temp.size();
}
else
{
//Copy to all string
if(buffpos != 0)
{
all.append(buffer, buffpos);
buffpos = 0;
}
all += temp;
}
}
};
//Copy to all string
if(buffpos != 0)
{
all.append(buffer, buffpos);
}
if(buffer != NULL)
delete [] buffer;
return true;
}
int CTextFileRead::GuessCharacterCount()
{
#if PEK_TX_TECHLEVEL==2
int bytecount = (int) m_file->GetLength();
#else
//Code needed to get file size when not using MFC
int bytecount = 1024*1024; //Default: 1 MB
#endif
//If ASCII, the number of characters is the byte count.
//If UTF-8, it can't be more than bytecount, so use byte count
if(m_encoding == ASCII || m_encoding == UTF_8)
return bytecount;
//Otherwise, every two byte in one character
return bytecount/2;
}
#if PEK_TX_TECHLEVEL==2
bool CTextFileRead::ReadLine(CString& line)
{
#ifndef _UNICODE
string temp;
#else
wstring temp;
#endif
if(!ReadLine(temp))
return false;
line = temp.c_str();
return true;
}
bool CTextFileRead::Read(CString& all, const CString newline)
{
#ifndef _UNICODE
string temp, n=newline;
#else
wstring temp, n=newline;
#endif
if(!Read(temp, n))
return false;
all = temp.c_str();
return true;
}
#endif
//Returns false if end-of-file was reached
//(line will not be changed). If returns true,
//it means that last line ended with a line break.
bool CTextFileRead::ReadWcharLine(wstring& line)
{
//EOF?
if(Eof())
return false;
wchar_t ch=0;
//Ignore 0x0D and 0x0A
//or just 0x0D
//or just 0x0A
//except when we read the first line
ReadWchar(ch);
if(!m_firstLine)
{
if(ch == 0x0D) //If next is 0x0A, ignore that too
{
ReadWchar(ch);
if(ch == 0x0A)
ReadWchar(ch);
}
else if(ch == 0x0A)
{
ReadWchar(ch);
}
}
else
{
//Next time we reads we don't read the first line in file.
//(then we should ignore \r\n)
m_firstLine = false;
}
//Clear line
line = L"";
//It would be a lot easier if we didn't use a buffer, and added directly to
//line, but that is quite slow.
wchar_t buffer[BUFFSIZE];
buffer[BUFFSIZE-1] = '\0';
//Where to insert next character
int bufpos = 0;
//Read line
while(ch != 0x0D && ch != 0x0A && !Eof())
{
//End of buffer?
if(bufpos+1 >= BUFFSIZE)
{
//Add to line
line.append(buffer, bufpos);
bufpos=0;
}
buffer[bufpos] = ch;
bufpos++;
ReadWchar(ch);
};
buffer[bufpos] = L'\0';
line += buffer;
//Save currents character in extra buffer
m_useExtraBuffer=true;
m_extraBuffer_wchar=ch;
return true;
}
//Returns false if end-of-file was reached
//(line will not be changed). If returns true,
//it means that last line ended with a line break.
bool CTextFileRead::ReadCharLine(string& line)
{
//EOF?
if(Eof())
return false;
unsigned char ch=0;
//Ignore 0x0D and 0x0A
//or just 0x0D
//or just 0x0A
//except when we read the first line
ReadByte(ch);
if(!m_firstLine)
{
if(ch == 0x0D) //If next is 0x0A, ignore that too
{
ReadByte(ch);
if(ch == 0x0A)
ReadByte(ch);
}
else if(ch == 0x0A)
{
ReadByte(ch);
}
}
else
{
//Next time we reads we don't read the first line in file.
//(then we should ignore \r\n)
m_firstLine = false;
}
//Clear line
line = "";
//It would be a lot easier if we didn't use a buffer, and added directly to
//line, but that is quite slow.
char buffer[BUFFSIZE];
buffer[BUFFSIZE-1] = '\0';
//Where to insert next character
int bufpos = 0;
//Read line
while(ch != 0x0D && ch != 0x0A && !Eof())
{
//End of buffer?
if(bufpos+1 >= BUFFSIZE)
{
//Add to line
line.append(buffer, bufpos);
bufpos=0;
}
buffer[bufpos] = ch;
bufpos++;
ReadByte(ch);
};
buffer[bufpos] = L'\0';
line += buffer;
//Save currents character in extra buffer
m_useExtraBuffer=true;
m_extraBuffer_char=ch;
return true;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -