📄 textfile.cpp

📁 A Model-View-Controller Framework that integrates with the MFC Doc/View architecture.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
#if PEK_TX_TECHLEVEL==0
	//If not Windows, do this
	m_file.open(filename, ios::binary | ios::in);
#elif PEK_TX_TECHLEVEL == 1

	m_hFile = ::CreateFile(	filename, 
							GENERIC_READ,
							FILE_SHARE_READ,
							NULL,
							OPEN_EXISTING, 
							FILE_ATTRIBUTE_NORMAL,
							NULL);
#else
	m_file = new CFile;
	//In windows, do this
	m_file->Open(filename, CFile::modeRead | CFile::shareDenyWrite );
	m_closeAndDeleteFile = true;
#endif

	m_firstLine = true;
	m_endoffile = (IsOpen()==0);

	//Force reading to buffer next time
	m_buffpos=-1;

	m_useExtraBuffer=false;
	
	ReadBOM();
}

#if PEK_TX_TECHLEVEL==2
CTextFileRead::CTextFileRead(CFile* file)
{
	ASSERT(file);

	m_file = file;
	m_closeAndDeleteFile = false;

	m_firstLine = true;
	m_endoffile = (IsOpen()==0);

	//Force reading to buffer next time
	m_buffpos=-1;

	m_useExtraBuffer=false;
	
	ReadBOM();
}
#endif

void CTextFileRead::ReadBOM()
{
	if( IsOpen() )
	{
		unsigned char bytes[2];

		//Read the first two bytes
		ReadByte(bytes[0]);
		ReadByte(bytes[1]);

		//Figure out what format the file is in
		if( bytes[0] == 0xFF && bytes[1] == 0xFE)
			m_encoding = UNI16_LE;
		else if( bytes[0] == 0xFE && bytes[1] == 0xFF)
			m_encoding = UNI16_BE;
		else if( bytes[0] == 0xEF && bytes[1] == 0xBB)
		{
			//This is probably UTF-8, check the third byte
			unsigned char temp;
			ReadByte(temp);
			if( temp == 0xBF)
				m_encoding = UTF_8;
			else
			{
				//Set text format.
				m_encoding = ASCII;
				ResetFilePointer();
			}
		}
		else
		{
			m_encoding = ASCII;

			//Set start pos
			ResetFilePointer();
		}
	}
}

//End of file?
bool CTextFileRead::Eof() const
{
	return m_endoffile;
}


//Read one byte
void CTextFileRead::ReadByte(unsigned char& ch)
{
	//Use extrabuffer if needed
	if(m_useExtraBuffer)
	{
		m_useExtraBuffer=false;
		ch = m_extraBuffer_char;
		return;
	}

	//In Windows, do this...

	//If buffer used or not read
	if(m_buffpos==-1 || m_buffpos == BUFFSIZE-1)
	{
#if PEK_TX_TECHLEVEL==0
		m_buffsize = m_file.read(m_buf, BUFFSIZE).gcount();	
#elif PEK_TX_TECHLEVEL==1

	DWORD dwRead;
	if (!::ReadFile(m_hFile, m_buf, BUFFSIZE, &dwRead, NULL))
	{
		//Couldn't read!
		Close();
		m_buffsize = 0;

		//Throw exception
		throw CTextFileException(GetLastError());
	}
	else
		m_buffsize = (int) dwRead;

#else
		m_buffsize=m_file->Read(m_buf, BUFFSIZE);
#endif

		if(m_buffsize == 0)
		{
			m_endoffile=true;
			ch = 0;
			return;
		}

		m_buffpos=0;
	}
	else
	{
		m_buffpos++;

		if(m_buffpos >= m_buffsize)
		{
			m_endoffile=true;
			ch = 0;
			return;
		}
	}


	ch = m_buf[m_buffpos];
}


void CTextFileRead::ReadWchar(wchar_t& ch)
{
	if(m_useExtraBuffer)
	{
		m_useExtraBuffer=false;
		ch = m_extraBuffer_wchar;
		return;
	}

	if(m_encoding == UTF_8)
	{	
		//This is quite tricky :-/
		//http://www.cl.cam.ac.uk/~mgk25/unicode.html#examples
		unsigned char byte;
		ReadByte(byte);

		int onesBeforeZero = 0;

		{	//Calc how many ones before the first zero...
			unsigned char temp = byte;

			while( (temp & 0x80)!=0 )
			{
				temp = (unsigned char) (temp << 1);
				onesBeforeZero++;
			}
		}

		if(onesBeforeZero==0)
		{
			ch = byte;
			return;
		}
		else if(onesBeforeZero == 2)
		{
			//U-00000080 - U-000007FF:  110xxxxx 10xxxxxx  
			unsigned char byteb;
			ReadByte(byteb);

			ch = (wchar_t)		 ( ((0x1F & byte) << 6)| 
								    (0x3F & byteb)
								 ) ;

			return;
		}
		else if(onesBeforeZero == 3)
		{
			//U-00000800 - U-0000FFFF:  1110xxxx 10xxxxxx 10xxxxxx
			unsigned char byteb, bytec;
			ReadByte(byteb);
			ReadByte(bytec);

			ch = (wchar_t)  ( ((0x0F & byte) << 12) |
							  ((0x3F & byteb) << 6) | 
							  (0x3F & bytec) );

			return;
		}

		//This should never happend! It it do, something is wrong with the file.
		ch = 0xFFFD;

	}
	else
	{

		unsigned char bytes[2];
		ReadByte(bytes[0]);
		ReadByte(bytes[1]); 

		if(m_encoding == UNI16_BE)
			ch = (wchar_t) ( ((wchar_t) bytes[0] << 8) | 
							  (wchar_t) bytes[1] 
						   ) ;
		else
			ch = (wchar_t) ( ((wchar_t) bytes[1] << 8) | 
							  (wchar_t) bytes[0] 
							);
	}
}


void CTextFileRead::ResetFilePointer()
{
	m_useExtraBuffer=false;

#if PEK_TX_TECHLEVEL==0
	m_file.clear();
	m_file.seekg(0, ios::beg);

#elif PEK_TX_TECHLEVEL==1

	::SetFilePointer(m_hFile, 0, NULL, FILE_BEGIN);

#else
	m_file->SeekToBegin();
#endif
	//Force reread buffer
	m_buffpos=-1;

	m_firstLine = true;
	m_endoffile = false;

}


bool CTextFileRead::ReadLine(string& line)
{
	//EOF?
	if(Eof())
		return false;

	if(m_encoding == ASCII)
	{
		return ReadCharLine(line);
	}

	wstring wline;

	if(!ReadWcharLine(wline))
		return false;

	//Convert
	WcharToString(wline.c_str(), line);

	return true;

}

bool CTextFileRead::ReadLine(wstring& line)
{
	//EOF?
	if(Eof())
		return false;

	if(m_encoding == ASCII)
	{
		string cline;

		if(!ReadCharLine(cline))
			return false;

		//Convert to wstring
		CharToWstring(cline.c_str(), line);

		return true;
	}

	return ReadWcharLine(line);

}


bool CTextFileRead::Read(string& all, const string newline)
{
	if(!IsOpen())
		return false;

	int buffsize = GuessCharacterCount()+2;
	int buffpos = 0;

	//Create buffer
	char* buffer = new char[buffsize];

	//If not possible, don't use any buffer
	if(buffer == NULL) 
		buffsize = 0;

	string temp;
	all = temp;
	all.reserve(buffsize);
	bool firstLine=true;

	while(!Eof())
	{
		if(ReadLine(temp))
		{
			//Add new line, if not first line
			if(!firstLine)
				temp.insert(0, newline.c_str());
			else
				firstLine=false;

			//Add to buffer if possible
			if(buffpos + (int) temp.size() < buffsize)
			{
				strcpy_s(buffer+buffpos, temp.size(), temp.c_str());
				buffpos += (int) temp.size();
			}
			else
			{
				//Copy to all string
				if(buffpos != 0)
				{
					all.append(buffer, buffpos);
					buffpos = 0;
				}

				all += temp;
			}
		}
	};

	//Copy to all string
	if(buffpos != 0)
	{
		all.append(buffer, buffpos);
	}

	if(buffer != NULL)
		delete [] buffer;

	return true;
}

bool CTextFileRead::Read(wstring& all, const wstring newline)
{
	if(!IsOpen())
		return false;
	
	int buffsize = GuessCharacterCount()+2;
	int buffpos = 0;

	//Create buffer
	wchar_t* buffer = new wchar_t[buffsize];

	//If not possible, don't use any buffer
	if(buffer == NULL) 
		buffsize = 0;

	wstring temp;
	all = temp;
	all.reserve(buffsize);
	bool firstLine=true;

	while(!Eof())
	{
		if(ReadLine(temp))
		{
			//Add new line, if not first line
			if(!firstLine)
				temp.insert(0, newline.c_str());
			else
				firstLine=false;

			//Add to buffer if possible
			if(buffpos + (int) temp.size() < buffsize)
			{
				wcscpy_s(buffer+buffpos, temp.size(), temp.c_str());
				buffpos += (int) temp.size();
			}
			else
			{
				//Copy to all string
				if(buffpos != 0)
				{
					all.append(buffer, buffpos);
					buffpos = 0;
				}

				all += temp;
			}
		}
	};

	//Copy to all string
	if(buffpos != 0)
	{
		all.append(buffer, buffpos);
	}

	if(buffer != NULL)
		delete [] buffer;

	return true;
}

int CTextFileRead::GuessCharacterCount()
{
	#if PEK_TX_TECHLEVEL==2
	int bytecount = (int) m_file->GetLength();
	#else
	//Code needed to get file size when not using MFC
	int bytecount = 1024*1024; //Default: 1 MB
	#endif

	//If ASCII, the number of characters is the byte count.
	//If UTF-8, it can't be more than bytecount, so use byte count
	if(m_encoding == ASCII || m_encoding == UTF_8)
		return bytecount;

	//Otherwise, every two byte in one character
	return bytecount/2;
}

#if PEK_TX_TECHLEVEL==2

bool CTextFileRead::ReadLine(CString& line)
{
#ifndef _UNICODE
	string temp;
#else
	wstring temp;
#endif

	if(!ReadLine(temp))
		return false;

	line = temp.c_str();
	return true;
}

bool CTextFileRead::Read(CString& all, const CString newline)
{
#ifndef _UNICODE
	string temp, n=newline;
#else
	wstring temp, n=newline;
#endif

	if(!Read(temp, n))
		return false;

	all = temp.c_str();
	return true;
}

#endif


//Returns false if end-of-file was reached
//(line will not be changed). If returns true,
//it means that last line ended with a line break.
bool CTextFileRead::ReadWcharLine(wstring& line)
{
	//EOF?
	if(Eof())
		return false;

	wchar_t ch=0;
	
	//Ignore 0x0D and 0x0A
	//or just 0x0D
	//or just 0x0A
	//except when we read the first line
	ReadWchar(ch);

	if(!m_firstLine)
	{
		if(ch == 0x0D) //If next is 0x0A, ignore that too
		{
			ReadWchar(ch);

			if(ch == 0x0A)
				ReadWchar(ch);
		}
		else if(ch == 0x0A)
		{
			ReadWchar(ch);
		}
	}
	else
	{
		//Next time we reads we don't read the first line in file.
		//(then we should ignore \r\n)
		m_firstLine = false;
	}

	//Clear line
	line = L"";

	//It would be a lot easier if we didn't use a buffer, and added directly to
	//line, but that is quite slow.
	wchar_t buffer[BUFFSIZE];
	buffer[BUFFSIZE-1] = '\0';
	//Where to insert next character
	int bufpos = 0;

	//Read line
	while(ch != 0x0D && ch != 0x0A && !Eof())
	{
		//End of buffer?
		if(bufpos+1 >= BUFFSIZE)
		{
			//Add to line
			line.append(buffer, bufpos);
			bufpos=0;
		}

		buffer[bufpos] = ch;
		bufpos++;

		ReadWchar(ch);
	};

	buffer[bufpos] = L'\0';
	line += buffer;
	
	//Save currents character in extra buffer
	m_useExtraBuffer=true;
	m_extraBuffer_wchar=ch;

	return true;
}

//Returns false if end-of-file was reached
//(line will not be changed). If returns true,
//it means that last line ended with a line break.
bool CTextFileRead::ReadCharLine(string& line)
{
	//EOF?
	if(Eof())
		return false;

	unsigned char ch=0;
	
	//Ignore 0x0D and 0x0A
	//or just 0x0D
	//or just 0x0A
	//except when we read the first line
	ReadByte(ch);

	if(!m_firstLine)
	{		
		if(ch == 0x0D) //If next is 0x0A, ignore that too
		{
			ReadByte(ch);

			if(ch == 0x0A)
				ReadByte(ch);
		}
		else if(ch == 0x0A)
		{
			ReadByte(ch);
		}
	}
	else
	{
		//Next time we reads we don't read the first line in file.
		//(then we should ignore \r\n)
		m_firstLine = false;
	}

	//Clear line
	line = "";

	//It would be a lot easier if we didn't use a buffer, and added directly to
	//line, but that is quite slow.
	char buffer[BUFFSIZE];
	buffer[BUFFSIZE-1] = '\0';
	//Where to insert next character
	int bufpos = 0;

	//Read line
	while(ch != 0x0D && ch != 0x0A && !Eof())
	{
		//End of buffer?
		if(bufpos+1 >= BUFFSIZE)
		{
			//Add to line
			line.append(buffer, bufpos);
			bufpos=0;
		}

		buffer[bufpos] = ch;
		bufpos++;

		ReadByte(ch);
	};

	buffer[bufpos] = L'\0';
	line += buffer;
	
	//Save currents character in extra buffer
	m_useExtraBuffer=true;
	m_extraBuffer_char=ch;

	return true;
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -