📄 cpl_csv.cpp
字号:
/****************************************************************************** * $Id: cpl_csv.cpp,v 1.8 2003/05/21 03:04:14 warmerda Exp $ * * Project: CPL - Common Portability Library * Purpose: CSV (comma separated value) file access. * Author: Frank Warmerdam, warmerda@home.com * ****************************************************************************** * Copyright (c) 1999, Frank Warmerdam * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. ****************************************************************************** * * $Log: cpl_csv.cpp,v $ * Revision 1.8 2003/05/21 03:04:14 warmerda * fixed bFinderInitialized * * Revision 1.7 2003/05/20 19:13:31 warmerda * reorganize default file search rules, use GDAL_DATA and CPLGetConfigOptions * * Revision 1.6 2002/11/30 16:56:31 warmerda * fixed up to support quoted newlines properly * * Revision 1.5 2002/11/27 19:09:40 warmerda * implement in-memory caching of whole CSV file * * Revision 1.4 2002/09/04 06:16:32 warmerda * added CPLReadLine(NULL) to cleanup * * Revision 1.3 2001/07/18 04:00:49 warmerda * added CPL_CVSID * * Revision 1.2 2001/01/19 21:16:41 warmerda * expanded tabs * * Revision 1.1 2000/10/06 15:20:45 warmerda * New * * Revision 1.2 2000/08/29 21:08:08 warmerda * fallback to use CPLFindFile() * * Revision 1.1 2000/04/05 21:55:59 warmerda * New * */#include "cpl_csv.h"#include "cpl_conv.h"CPL_CVSID("$Id: cpl_csv.cpp,v 1.8 2003/05/21 03:04:14 warmerda Exp $");/* ==================================================================== *//* The CSVTable is a persistant set of info about an open CSV *//* table. While it doesn't currently maintain a record index, *//* or in-memory copy of the table, it could be changed to do so *//* in the future. *//* ==================================================================== */typedef struct ctb { FILE *fp; struct ctb *psNext; char *pszFilename; char **papszFieldNames; char **papszRecFields; int iLastLine; /* Cache for whole file */ int nLineCount; char **papszLines; int *panLineIndex; char *pszRawData;} CSVTable;static CSVTable *psCSVTableList = NULL;/************************************************************************//* CSVAccess() *//* *//* This function will fetch a handle to the requested table. *//* If not found in the ``open table list'' the table will be *//* opened and added to the list. Eventually this function may *//* become public with an abstracted return type so that *//* applications can set options about the table. For now this *//* isn't done. *//************************************************************************/static CSVTable *CSVAccess( const char * pszFilename ){ CSVTable *psTable; FILE *fp;/* -------------------------------------------------------------------- *//* Is the table already in the list. *//* -------------------------------------------------------------------- */ for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext ) { if( EQUAL(psTable->pszFilename,pszFilename) ) { /* * Eventually we should consider promoting to the front of * the list to accelerate frequently accessed tables. */ return( psTable ); } }/* -------------------------------------------------------------------- *//* If not, try to open it. *//* -------------------------------------------------------------------- */ fp = VSIFOpen( pszFilename, "rb" ); if( fp == NULL ) return NULL;/* -------------------------------------------------------------------- *//* Create an information structure about this table, and add to *//* the front of the list. *//* -------------------------------------------------------------------- */ psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1); psTable->fp = fp; psTable->pszFilename = CPLStrdup( pszFilename ); psTable->psNext = psCSVTableList; psCSVTableList = psTable;/* -------------------------------------------------------------------- *//* Read the table header record containing the field names. *//* -------------------------------------------------------------------- */ psTable->papszFieldNames = CSVReadParseLine( fp ); return( psTable );}/************************************************************************//* CSVDeaccess() *//************************************************************************/void CSVDeaccess( const char * pszFilename ){ CSVTable *psLast, *psTable; /* -------------------------------------------------------------------- *//* A NULL means deaccess all tables. *//* -------------------------------------------------------------------- */ if( pszFilename == NULL ) { while( psCSVTableList != NULL ) CSVDeaccess( psCSVTableList->pszFilename ); return; }/* -------------------------------------------------------------------- *//* Find this table. *//* -------------------------------------------------------------------- */ psLast = NULL; for( psTable = psCSVTableList; psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename); psTable = psTable->psNext ) { psLast = psTable; } if( psTable == NULL ) { CPLDebug( "CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename ); return; }/* -------------------------------------------------------------------- *//* Remove the link from the list. *//* -------------------------------------------------------------------- */ if( psLast != NULL ) psLast->psNext = psTable->psNext; else psCSVTableList = psTable->psNext;/* -------------------------------------------------------------------- *//* Free the table. *//* -------------------------------------------------------------------- */ if( psTable->fp != NULL ) VSIFClose( psTable->fp ); CSLDestroy( psTable->papszFieldNames ); CSLDestroy( psTable->papszRecFields ); CPLFree( psTable->pszFilename ); CPLFree( psTable->panLineIndex ); CPLFree( psTable->pszRawData ); CPLFree( psTable->papszLines ); CPLFree( psTable ); CPLReadLine( NULL );}/************************************************************************//* CSVSplitLine() *//* *//* Tokenize a CSV line into fields in the form of a string *//* list. This is used instead of the CPLTokenizeString() *//* because it provides correct CSV escaping and quoting *//* semantics. *//************************************************************************/static char **CSVSplitLine( const char *pszString ){ char **papszRetList = NULL; char *pszToken; int nTokenMax, nTokenLen; pszToken = (char *) CPLCalloc(10,1); nTokenMax = 10; while( pszString != NULL && *pszString != '\0' ) { int bInString = FALSE; nTokenLen = 0; /* Try to find the next delimeter, marking end of token */ for( ; *pszString != '\0'; pszString++ ) { /* End if this is a delimeter skip it and break. */ if( !bInString && *pszString == ',' ) { pszString++; break; } if( *pszString == '"' ) { if( !bInString || pszString[1] != '"' ) { bInString = !bInString; continue; } else /* doubled quotes in string resolve to one quote */ { pszString++; } } if( nTokenLen >= nTokenMax-2 ) { nTokenMax = nTokenMax * 2 + 10; pszToken = (char *) CPLRealloc( pszToken, nTokenMax ); } pszToken[nTokenLen] = *pszString; nTokenLen++; } pszToken[nTokenLen] = '\0'; papszRetList = CSLAddString( papszRetList, pszToken ); /* If the last token is an empty token, then we have to catch * it now, otherwise we won't reenter the loop and it will be lost. */ if ( *pszString == '\0' && *(pszString-1) == ',' ) { papszRetList = CSLAddString( papszRetList, "" ); } } if( papszRetList == NULL ) papszRetList = (char **) CPLCalloc(sizeof(char *),1); CPLFree( pszToken ); return papszRetList;}/************************************************************************//* CSVFindNextLine() *//* *//* Find the start of the next line, while at the same time zero *//* terminating this line. Take into account that there may be *//* newline indicators within quoted strings, and that quotes *//* can be escaped with a backslash. *//************************************************************************/static char *CSVFindNextLine( char *pszThisLine ){ int nQuoteCount = 0, i; for( i = 0; pszThisLine[i] != '\0'; i++ ) { if( pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i-1] != '\\') ) nQuoteCount++; if( (pszThisLine[i] == 10 || pszThisLine[i] == 13) && (nQuoteCount % 2) == 0 ) break; } while( pszThisLine[i] == 10 || pszThisLine[i] == 13 ) pszThisLine[i++] = '\0'; if( pszThisLine[i] == '\0' ) return NULL; else return pszThisLine + i;}/************************************************************************//* CSVIngest() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -