📄 simplecsvparser.cpp
字号:
// SimpleCSVParser.cpp: implementation of the CSimpleCSVParser class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "SimpleCSVParser.h"
#include "SimpleCSVParser.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CSimpleCSVParser::CSimpleCSVParser()
{
m_pData = NULL;
m_bInitialized = FALSE;
m_pFirstRow = NULL;
m_pLastRow = NULL;
m_pCurRow = NULL;
m_nRows = 0;
m_nCols = 0;
m_nCurRow = 0;
}
CSimpleCSVParser::~CSimpleCSVParser()
{
Cleanup();
}
// This method opens the file passed and reads all
// of the data into memory. The file is expected
// to be a CSV file with the first record containing
// column names
BOOL CSimpleCSVParser::Initialize(LPCTSTR fileName)
{
m_bInitialized = FALSE;
m_pFirstRow = NULL;
m_pLastRow = NULL;
m_pCurRow = NULL;
m_nRows = 0;
m_nCols = 0;
// Prepare for Initialize to be called more than once
Cleanup();
CFile file(fileName, CFile::modeRead | CFile::shareDenyNone);
// allocate a buffer large enough to hold the file Data
m_pData = new BYTE[file.GetLength() + 1];
if (!m_pData)
return FALSE;
// Initialize the buffer with -1. -1 is what I decided to use
// as a file delimiter. When I read the contents of the file
// into the buffer it will overwrite all of the -1's but the
// last one.
memset(m_pData, (unsigned char)-1, file.GetLength() + 1);
// read the data into the buffer
file.Read((void*)m_pData, file.GetLength());
ParseColumns();
InitializeRowPtrs();
MoveFirst();
m_bInitialized = TRUE;
return TRUE;
}
void DeleteColumn(CColumnDefinition* pColDef)
{
if (pColDef)
delete pColDef;
}
void CSimpleCSVParser::Cleanup()
{
if (m_pData)
{
delete [] m_pData;
m_pData = NULL;
}
ForEachColumn(::DeleteColumn);
m_columns.RemoveAll();
}
// Assumption: The first row of data contains the column information in
// the following format:
// ColName:ColType[:ColSize(for string types)][:Precision(for decimal types)]
//
// Strings have the following format:
// ColName:ColType:ColSize
//
// Ints have the following format:
// ColName:ColType
//
// Floats have the following format:
// ColName:ColType:Precision
void CSimpleCSVParser::ParseColumns()
{
CColumnDefinition* pColDef;
TCHAR buf[256];
TCHAR colName[256];
DWORD colType = stringType;
DWORD colSize = 0;
DWORD colPrecision = 0;
LPBYTE pCurByte = m_pData;
int nStartByte = 0;
int nCurByte = 0;
enum States { getColName, getColType, getColSize, getColPrecision };
States nextState = getColName;
// Only read until we reach the end of line
while (pCurByte)
{
switch(*pCurByte)
{
// new column definition
case '\r':
case ',':
{
switch (nextState)
{
case getColType:
memset(buf, NULL, sizeof(buf));
memcpy(buf, &m_pData[nStartByte], nCurByte - nStartByte);
colType = ColumnTypeFromString(buf);
ASSERT((colType == dateType) || (colType == intType));
nStartByte = nCurByte + 1; // skip the ','
nextState = getColName;
break;
case getColSize:
memset(buf, NULL, sizeof(buf));
memcpy(buf, &m_pData[nStartByte], nCurByte - nStartByte);
colSize = atol(buf);
nStartByte = nCurByte + 1; // skip the ','
nextState = getColName;
break;
case getColPrecision:
memset(buf, NULL, sizeof(buf));
memcpy(buf, &m_pData[nStartByte], nCurByte - nStartByte);
colPrecision = atol(buf);
nStartByte = nCurByte + 1; // skip the ','
nextState = getColName;
break;
}
pColDef = new CColumnDefinition();
pColDef->SetColumnData(colName, colType, colSize, colPrecision);
m_columns.Add((void*)pColDef);
memset(colName, NULL, sizeof(colName));
colType = stringType;
colSize = 0;
colPrecision = 0;
break;
}
// column attribute
case ':':
{
switch(nextState)
{
case getColName:
memset(colName, NULL, sizeof(colName));
memcpy(colName, &m_pData[nStartByte], nCurByte - nStartByte);
nStartByte = nCurByte + 1; // skip the ':'
nextState = getColType;
break;
case getColType:
memset(buf, NULL, sizeof(buf));
memcpy(buf, &m_pData[nStartByte], nCurByte - nStartByte);
colType = ColumnTypeFromString(buf);
ASSERT((colType == stringType) || (colType == floatType));
nStartByte = nCurByte + 1; // skip the ':'
if (colType == stringType)
nextState = getColSize;
else
nextState = getColPrecision;
break;
case getColSize:
case getColPrecision:
ASSERT(FALSE);
break;
}
break;
}
}
// End of the line
if (*pCurByte == '\r')
break;
nCurByte++;
pCurByte++;
}
m_nCols = m_columns.GetSize();
ASSERT(m_nCols > 0);
}
DWORD CSimpleCSVParser::ColumnTypeFromString(LPTSTR strColType)
{
CString colType = strColType;
if (colType == CString("integer"))
return intType;
if (colType == CString("float"))
return floatType;
if (colType == CString("date"))
return dateType;
// by default assume string data type
return stringType;
}
void DisplayColumn(CColumnDefinition* pColDef)
{
if (pColDef)
AfxMessageBox(pColDef->GetColumnName());
}
void CSimpleCSVParser::DisplayColumns()
{
ForEachColumn(::DisplayColumn);
}
void CSimpleCSVParser::InitializeRowPtrs()
{
m_pFirstRow = NULL;
m_pLastRow = NULL;
m_pCurRow = NULL;
// The First Row will be the 2nd row in the file since
// the first row contains column definitions
m_pFirstRow = FindNextRow(m_pData);
m_pCurRow = m_pFirstRow;
LPBYTE pHoldPos = m_pFirstRow;
m_nRows = 0;
while (pHoldPos != NULL)
{
m_pLastRow = pHoldPos;
pHoldPos = FindNextRow(pHoldPos);
m_nRows++;
}
}
LPBYTE CSimpleCSVParser::FindPrevRow(LPBYTE pPos)
{
ASSERT(pPos);
if (!pPos)
return NULL;
ASSERT(m_pFirstRow != NULL);
if (m_pFirstRow == NULL)
return NULL;
if (pPos == m_pFirstRow)
return m_pFirstRow;
BOOL bInPrevRow = FALSE;
LPBYTE p = pPos;
while (p != m_pFirstRow)
{
if (*p == '\r' || *p == '\n')
{
if (bInPrevRow)
{
p++;
break;
}
else
{
while (*p == '\r' || *p == '\n')
{
p--;
}
bInPrevRow = TRUE;
}
}
p--;
}
return p;
}
LPBYTE CSimpleCSVParser::FindNextRow(LPBYTE pPos)
{
ASSERT(pPos);
if (!pPos)
return NULL;
LPBYTE p = pPos;
unsigned char endOfFile = (unsigned char)-1;
while (*p != endOfFile)
{
if (*p == '\r' || *p == '\n')
{
while (*p == '\r' || *p == '\n')
{
p++;
}
return ((*p != endOfFile) ? p : NULL);
}
p++;
}
return NULL;
}
void CSimpleCSVParser::ForEachColumn(ForEachColumnFunc pForEachColumnFunc)
{
if (!m_bInitialized)
return;
for (int i = 0; i < m_columns.GetSize(); i++)
{
CColumnDefinition* pColDef = (CColumnDefinition*) m_columns[i];
(*pForEachColumnFunc)(pColDef);
}
}
BOOL CSimpleCSVParser::ParseCurrentRow()
{
CleanColumnData();
ASSERT(m_pCurRow != NULL);
if (m_pCurRow == NULL)
return FALSE;
BOOL bEndOfRow = FALSE;
CString colValue;
LPBYTE pStartPos = m_pCurRow;
LPBYTE pEndPos = m_pCurRow;
int curCol = 0;
while (pEndPos)
{
switch(*pEndPos)
{
// new column definition
case '\r':
case 255: // end of file
bEndOfRow = TRUE;
colValue.Empty();
if (pEndPos > pStartPos)
{
LPTSTR pBuffer = colValue.GetBuffer(pEndPos - pStartPos);
_tcsncpy(pBuffer, (const char*)pStartPos, pEndPos - pStartPos);
colValue.ReleaseBuffer();
((CColumnDefinition*)m_columns.GetAt(curCol))->SetColumnValue(colValue);
}
curCol++;
break;
case ',':
colValue.Empty();
if (pEndPos > pStartPos)
{
LPTSTR pBuffer = colValue.GetBuffer(pEndPos - pStartPos);
_tcsncpy(pBuffer, (const char*)pStartPos, pEndPos - pStartPos);
colValue.ReleaseBuffer();
((CColumnDefinition*)m_columns.GetAt(curCol))->SetColumnValue(colValue);
}
curCol++;
pEndPos++;
pStartPos = pEndPos;
break;
}
if (bEndOfRow)
break;
pEndPos++;
}
return TRUE;
}
void EmptyData(CColumnDefinition* pColDef)
{
if (pColDef)
pColDef->SetColumnValue(_T(""));
}
void CSimpleCSVParser::CleanColumnData()
{
ForEachColumn(EmptyData);
}
BOOL CSimpleCSVParser::MoveFirst()
{
ASSERT(m_pFirstRow);
if (!m_pFirstRow)
return FALSE;
m_nCurRow = 1;
m_pCurRow = m_pFirstRow;
return ParseCurrentRow();
}
BOOL CSimpleCSVParser::MoveNext()
{
ASSERT(m_pCurRow);
if (!m_pCurRow)
return FALSE;
ASSERT(m_pLastRow);
if (!m_pLastRow)
return FALSE;
if (m_pCurRow == m_pLastRow)
return FALSE;
m_pCurRow = FindNextRow(m_pCurRow);
if (ParseCurrentRow())
m_nCurRow++;
else
return FALSE;
return TRUE;
}
BOOL CSimpleCSVParser::MovePrev()
{
ASSERT(m_pCurRow);
if (!m_pCurRow)
return FALSE;
ASSERT(m_pFirstRow);
if (!m_pFirstRow)
return FALSE;
if (m_pCurRow == m_pFirstRow)
return FALSE;
m_pCurRow = FindPrevRow(m_pCurRow);
if (ParseCurrentRow())
m_nCurRow--;
else
return FALSE;
return TRUE;
}
BOOL CSimpleCSVParser::MoveLast()
{
ASSERT(m_pLastRow);
if (!m_pLastRow)
return FALSE;
m_pCurRow = m_pLastRow;
if (ParseCurrentRow())
m_nCurRow = m_nRows;
else
return FALSE;
return TRUE;
}
long CSimpleCSVParser::GetColumnCount()
{
return (long)m_columns.GetSize();
}
long CSimpleCSVParser::GetRowCount() const
{
return (long)m_nRows;
}
CColumnDefinition* CSimpleCSVParser::GetColumnDefinition(int ndx)
{
return (CColumnDefinition*)m_columns[ndx];
}
BOOL CSimpleCSVParser::GotoRow(long nIndex)
{
if (nIndex < 0)
return FALSE;
if (nIndex >= m_nRows)
return FALSE;
if (nIndex == 0)
MoveFirst();
else if (nIndex == (m_nRows - 1))
MoveLast();
else if (nIndex > m_nCurRow)
{
while (nIndex > m_nCurRow)
{
MoveNext();
}
}
else if (nIndex < m_nCurRow)
{
while (nIndex < m_nCurRow)
{
MovePrev();
}
}
return TRUE;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -