📄 cpl_csv.cpp
字号:
/******************************************************************************
* $Id: cpl_csv.cpp 10646 2007-01-18 02:38:10Z warmerdam $
*
* Project: CPL - Common Portability Library
* Purpose: CSV (comma separated value) file access.
* Author: Frank Warmerdam, warmerdam@pobox.com
*
******************************************************************************
* Copyright (c) 1999, Frank Warmerdam
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
****************************************************************************/
#include "cpl_csv.h"
#include "cpl_conv.h"
#include "cpl_multiproc.h"
CPL_CVSID("$Id: cpl_csv.cpp 10646 2007-01-18 02:38:10Z warmerdam $");
CPL_C_START
const char * GDALDefaultCSVFilename( const char *pszBasename );
CPL_C_END
/* ==================================================================== */
/* The CSVTable is a persistant set of info about an open CSV */
/* table. While it doesn't currently maintain a record index, */
/* or in-memory copy of the table, it could be changed to do so */
/* in the future. */
/* ==================================================================== */
typedef struct ctb {
FILE *fp;
struct ctb *psNext;
char *pszFilename;
char **papszFieldNames;
char **papszRecFields;
int iLastLine;
/* Cache for whole file */
int nLineCount;
char **papszLines;
int *panLineIndex;
char *pszRawData;
} CSVTable;
/* It would likely be better to share this list between threads, but
that will require some rework. */
/************************************************************************/
/* CSVAccess() */
/* */
/* This function will fetch a handle to the requested table. */
/* If not found in the ``open table list'' the table will be */
/* opened and added to the list. Eventually this function may */
/* become public with an abstracted return type so that */
/* applications can set options about the table. For now this */
/* isn't done. */
/************************************************************************/
static CSVTable *CSVAccess( const char * pszFilename )
{
CSVTable *psTable;
FILE *fp;
/* -------------------------------------------------------------------- */
/* Fetch the table, and allocate the thread-local pointer to it */
/* if there isn't already one. */
/* -------------------------------------------------------------------- */
CSVTable **ppsCSVTableList;
ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
if( ppsCSVTableList == NULL )
{
ppsCSVTableList = (CSVTable **) CPLCalloc(1,sizeof(CSVTable*));
CPLSetTLS( CTLS_CSVTABLEPTR, ppsCSVTableList, TRUE );
}
/* -------------------------------------------------------------------- */
/* Is the table already in the list. */
/* -------------------------------------------------------------------- */
for( psTable = *ppsCSVTableList;
psTable != NULL;
psTable = psTable->psNext )
{
if( EQUAL(psTable->pszFilename,pszFilename) )
{
/*
* Eventually we should consider promoting to the front of
* the list to accelerate frequently accessed tables.
*/
return( psTable );
}
}
/* -------------------------------------------------------------------- */
/* If not, try to open it. */
/* -------------------------------------------------------------------- */
fp = VSIFOpen( pszFilename, "rb" );
if( fp == NULL )
return NULL;
/* -------------------------------------------------------------------- */
/* Create an information structure about this table, and add to */
/* the front of the list. */
/* -------------------------------------------------------------------- */
psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
psTable->fp = fp;
psTable->pszFilename = CPLStrdup( pszFilename );
psTable->psNext = *ppsCSVTableList;
*ppsCSVTableList = psTable;
/* -------------------------------------------------------------------- */
/* Read the table header record containing the field names. */
/* -------------------------------------------------------------------- */
psTable->papszFieldNames = CSVReadParseLine( fp );
return( psTable );
}
/************************************************************************/
/* CSVDeaccess() */
/************************************************************************/
void CSVDeaccess( const char * pszFilename )
{
CSVTable *psLast, *psTable;
/* -------------------------------------------------------------------- */
/* Fetch the table, and allocate the thread-local pointer to it */
/* if there isn't already one. */
/* -------------------------------------------------------------------- */
CSVTable **ppsCSVTableList;
ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
if( ppsCSVTableList == NULL )
return;
/* -------------------------------------------------------------------- */
/* A NULL means deaccess all tables. */
/* -------------------------------------------------------------------- */
if( pszFilename == NULL )
{
while( *ppsCSVTableList != NULL )
CSVDeaccess( (*ppsCSVTableList)->pszFilename );
return;
}
/* -------------------------------------------------------------------- */
/* Find this table. */
/* -------------------------------------------------------------------- */
psLast = NULL;
for( psTable = *ppsCSVTableList;
psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
psTable = psTable->psNext )
{
psLast = psTable;
}
if( psTable == NULL )
{
CPLDebug( "CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename );
return;
}
/* -------------------------------------------------------------------- */
/* Remove the link from the list. */
/* -------------------------------------------------------------------- */
if( psLast != NULL )
psLast->psNext = psTable->psNext;
else
*ppsCSVTableList = psTable->psNext;
/* -------------------------------------------------------------------- */
/* Free the table. */
/* -------------------------------------------------------------------- */
if( psTable->fp != NULL )
VSIFClose( psTable->fp );
CSLDestroy( psTable->papszFieldNames );
CSLDestroy( psTable->papszRecFields );
CPLFree( psTable->pszFilename );
CPLFree( psTable->panLineIndex );
CPLFree( psTable->pszRawData );
CPLFree( psTable->papszLines );
CPLFree( psTable );
CPLReadLine( NULL );
}
/************************************************************************/
/* CSVSplitLine() */
/* */
/* Tokenize a CSV line into fields in the form of a string */
/* list. This is used instead of the CPLTokenizeString() */
/* because it provides correct CSV escaping and quoting */
/* semantics. */
/************************************************************************/
static char **CSVSplitLine( const char *pszString )
{
char **papszRetList = NULL;
char *pszToken;
int nTokenMax, nTokenLen;
pszToken = (char *) CPLCalloc(10,1);
nTokenMax = 10;
while( pszString != NULL && *pszString != '\0' )
{
int bInString = FALSE;
nTokenLen = 0;
/* Try to find the next delimeter, marking end of token */
for( ; *pszString != '\0'; pszString++ )
{
/* End if this is a delimeter skip it and break. */
if( !bInString && *pszString == ',' )
{
pszString++;
break;
}
if( *pszString == '"' )
{
if( !bInString || pszString[1] != '"' )
{
bInString = !bInString;
continue;
}
else /* doubled quotes in string resolve to one quote */
{
pszString++;
}
}
if( nTokenLen >= nTokenMax-2 )
{
nTokenMax = nTokenMax * 2 + 10;
pszToken = (char *) CPLRealloc( pszToken, nTokenMax );
}
pszToken[nTokenLen] = *pszString;
nTokenLen++;
}
pszToken[nTokenLen] = '\0';
papszRetList = CSLAddString( papszRetList, pszToken );
/* If the last token is an empty token, then we have to catch
* it now, otherwise we won't reenter the loop and it will be lost.
*/
if ( *pszString == '\0' && *(pszString-1) == ',' )
{
papszRetList = CSLAddString( papszRetList, "" );
}
}
if( papszRetList == NULL )
papszRetList = (char **) CPLCalloc(sizeof(char *),1);
CPLFree( pszToken );
return papszRetList;
}
/************************************************************************/
/* CSVFindNextLine() */
/* */
/* Find the start of the next line, while at the same time zero */
/* terminating this line. Take into account that there may be */
/* newline indicators within quoted strings, and that quotes */
/* can be escaped with a backslash. */
/************************************************************************/
static char *CSVFindNextLine( char *pszThisLine )
{
int nQuoteCount = 0, i;
for( i = 0; pszThisLine[i] != '\0'; i++ )
{
if( pszThisLine[i] == '\"'
&& (i == 0 || pszThisLine[i-1] != '\\') )
nQuoteCount++;
if( (pszThisLine[i] == 10 || pszThisLine[i] == 13)
&& (nQuoteCount % 2) == 0 )
break;
}
while( pszThisLine[i] == 10 || pszThisLine[i] == 13 )
pszThisLine[i++] = '\0';
if( pszThisLine[i] == '\0' )
return NULL;
else
return pszThisLine + i;
}
/************************************************************************/
/* CSVIngest() */
/* */
/* Load entire file into memory and setup index if possible. */
/************************************************************************/
static void CSVIngest( const char *pszFilename )
{
CSVTable *psTable = CSVAccess( pszFilename );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -