⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tokenizer.h

📁 GNU Common C++ is a very portable and highly optimized class framework for writing C++ applications
💻 H
字号:
// Copyright (C) 1999-2005 Open Source Telecom Corporation.//// This program is free software; you can redistribute it and/or modify// it under the terms of the GNU General Public License as published by// the Free Software Foundation; either version 2 of the License, or// (at your option) any later version.// // This program is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the// GNU General Public License for more details.//// You should have received a copy of the GNU General Public License// along with this program; if not, write to the Free Software// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.// // As a special exception, you may use this file as part of a free software// library without restriction.  Specifically, if other files instantiate// templates or use macros or inline functions from this file, or you compile// this file and link it with other files to produce an executable, this// file does not by itself cause the resulting executable to be covered by// the GNU General Public License.  This exception does not however    // invalidate any other reasons why the executable file might be covered by// the GNU General Public License.    //// This exception applies only to the code released under the name GNU// Common C++.  If you copy code from other releases into a copy of GNU// Common C++, as the General Public License permits, the exception does// not apply to the code that you add in this way.  To avoid misleading// anyone as to the status of such modified files, you must delete// this exception notice from them.//// If you write modifications of your own for GNU Common C++, it is your choice// whether to permit this exception to apply to your modifications.// If you do not wish that, delete this exception notice.///** * @file tokenizer.h * @short string tokenizer. **/#ifndef	CCXX_TOKENIZER_H_#define	CCXX_TOKENIZER_H_#ifndef CCXX_MISSING_H_#include <cc++/missing.h>#endif#ifndef CCXX_THREAD_H_#include <cc++/thread.h>#endif#ifdef	CCXX_NAMESPACESnamespace ost {#endif/** * Splits delimited string into tokens. * * The StringTokenizer takes a pointer to a string and a pointer * to a string containing a number of possible delimiters. * The StringTokenizer provides an input forward iterator which allows * to iterate through all tokens. An iterator behaves like a logical * pointer to the tokens, i.e. to shift to the next token, you've  * to increment the iterator, you get the token by dereferencing the * iterator. * * Memory consumption: * This class operates on the original string and only allocates memory * for the individual tokens actually requested, so this class  * allocates at maximum the space required for the longest token in the  * given string. * Since for each iteration, memory is reclaimed for the last token, * you MAY NOT store pointers to them; if you need them afterwards, * copy them. You may not modify the original string while you operate * on it with the StringTokenizer; the behaviour is undefined in that * case. * * The iterator has one special method 'nextDelimiter()' which returns * a character containing the next delimiter following this * tokenization process or '\\0', if there are no following delimiters. In * case of skipAllDelim, it returns the FIRST delimiter. * * With the method 'setDelimiters(const char*)' you may change the * set of delimiters. It affects all running iterators. * * Example: * <code><pre> *  StringTokenizer st("mary had a little lamb;its fleece was..", " ;"); *  StringTokenizer::iterator i; *  for (i = st.begin() ; i != st.end() ; ++i) { *        cout << "Token: '" << *i << "'\t"; *        cout << " next Delim: '" << i.nextDelimiter() << "'" << endl; *  } *  </pre></code> * * @author Henner Zeller <H.Zeller@acm.org> * @license LGPL */class __EXPORT StringTokenizer {public:	/**	 * a delimiter string containing all usual whitespace delimiters.	 * These are space, tab, newline, carriage return,	 * formfeed and vertical tab. (see isspace() manpage).	 */	static const char * const SPACE;	/**	 * Exception thrown, if someone tried to read beyond the	 * end of the tokens.	 * Will not happen if you use it the 'clean' way with comparison	 * against end(), but if you skip some tokens, because you 'know'	 * they are there. Simplifies error handling a lot, since you can	 * just read your tokens the way you expect it, and if there is some	 * error in the input this Exception will be thrown.	 */	// maybe move more global ?	class NoSuchElementException { };	/**	 * The input forward iterator for tokens.	 * @author Henner Zeller	 */	class __EXPORT iterator {		friend class StringTokenizer;  // access our private constructors	private:		const StringTokenizer *myTok; // my StringTokenizer		const char *start;      // start of current token		const char *tokEnd;     // end of current token (->nxDelimiter)		const char *endp;       // one before next token		char *token;            // allocated token, if requested		// for initialization of the itEnd iterator		iterator(const StringTokenizer &tok, const char *end) 			: myTok(&tok),tokEnd(0),endp(end),token(0) {}		iterator(const StringTokenizer &tok)			: myTok(&tok),tokEnd(0),endp(myTok->str-1),token(0) {			++(*this); // init first token.		}	public:		iterator() : myTok(0),start(0),tokEnd(0),endp(0),token(0) {}		// see also: comment in implementation of operator++		virtual ~iterator() { if (token) *token='\0'; delete [] token; }				/**		 * copy constructor.		 */		// everything, but not responsible for the allocated token.		iterator(const iterator& i) :			myTok(i.myTok),start(i.start),tokEnd(i.tokEnd),			endp(i.endp),token(0) {}		/**		 * assignment operator.		 */		// everything, but not responsible for the allocated token.		iterator &operator = (const iterator &i) {			myTok = i.myTok; 			start = i.start; endp = i.endp; tokEnd = i.tokEnd;			if ( token )				delete [] token;			token = 0;			return *this;		}		/**		 * shifts this iterator to the next token in the string.		 */		iterator &operator ++ () THROWS (NoSuchElementException);		/**		 * returns the immutable string this iterator		 * points to or '0' if no token is available (i.e.		 * i == end()).		 * Do not store pointers to this token, since it is		 * invalidated for each iteration. If you need the token,		 * copy it (e.g. with strdup());		 */		const char*  operator *  () THROWS (NoSuchElementException);				/**		 * returns the next delimiter after the current token or		 * '\\0', if there are no following delimiters.		 * It returns the very next delimiter (even if 		 * skipAllDelim=true).		 */		inline char nextDelimiter() const {			return (tokEnd) ? *tokEnd : '\0';		}				/**		 * compares to other iterator. Usually used to		 * compare against the end() iterator.		 */		// only compare the end-position. speed.		inline bool operator == (const iterator &other) const { 			return (endp == other.endp);		}		/**		 * compares to other iterator. Usually used to		 * compare against the end() iterator.		 */		// only compare the end position. speed.		inline bool operator != (const iterator &other) const { 			return (endp != other.endp);		}	};private:	friend class StringTokenizer::iterator;	const char *str;	const char *delim;	bool skipAll, trim;	iterator itEnd;public:	/**	 * creates a new StringTokenizer for a string	 * and a given set of delimiters.	 *	 * @param  str          String to be split up. This string will	 *                      not be modified by this StringTokenizer,	 *                      but you may as well not modfiy this string	 *                      while tokenizing is in process, which may	 *                      lead to undefined behaviour.	 *	 * @param  delim        String containing the characters	 *                      which should be regarded as delimiters.	 *	 * @param  skipAllDelim OPTIONAL. 	 *                      true, if subsequent	 *                      delimiters should be skipped at once	 *                      or false, if empty tokens should	 *                      be returned for two delimiters with	 *                      no other text inbetween. The first	 *                      behaviour may be desirable for whitespace	 *                      skipping, the second for input with	 *                      delimited entry e.g. /etc/passwd like files	 *                      or CSV input.	 *                      NOTE, that 'true' here resembles the 	 *                      ANSI-C strtok(char *s,char *d) behaviour.	 *                      DEFAULT = false	 *	 * @param trim          OPTIONAL. 	 *                      true, if the tokens returned	 *                      should be trimmed, so that they don't have	 *                      any whitespaces at the beginning or end.	 *                      Whitespaces are any of the characters 	 *                      defined in StringTokenizer::SPACE.	 *                      If delim itself is StringTokenizer::SPACE,	 *                      this will result in a behaviour with 	 *                      skipAllDelim = true.	 *                      DEFAULT = false	 */	StringTokenizer (const char *str,			 const char *delim,			 bool skipAllDelim = false,			 bool trim = false);		/**	 * create a new StringTokenizer which splits the input	 * string at whitespaces. The tokens are stripped from	 * whitespaces. This means, if you change the set of	 * delimiters in either the 'begin(const char *delim)' method	 * or in 'setDelimiters()', you then get whitespace	 * trimmed tokens, delimited by the new set.	 * Behaves like StringTokenizer(s, StringTokenizer::SPACE,false,true);	 */	StringTokenizer (const char *s);	/**	 * returns the begin iterator	 */	iterator begin() const { 		return iterator(*this);	}		/**	 * changes the set of delimiters used in subsequent	 * iterations.	 */	void setDelimiters (const char *d) {		delim = d;	}		/**	 * returns a begin iterator with an alternate set of 	 * delimiters.	 */	iterator begin(const char *d) { 		delim = d;		return iterator(*this);	}	/**	 * the iterator marking the end.	 */	const iterator& end() const { return itEnd; }};#ifdef	CCXX_NAMESPACES}#endif#endif/** EMACS ** * Local variables: * mode: c++ * c-basic-offset: 8 * End: */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -