00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef INCLUDED_SIMPLETOKENIZER_H
00012 #include "SimpleTokenizer.h"
00013 #endif
00014
00015
00016 SimpleTokenizer::~SimpleTokenizer()
00017 {
00018 }
00019 SimpleTokenizer::SimpleTokenizer(String delim, String term) : delimiters(delim), terminators(term)
00020 {
00021 }
00022
00023 SimpleTokenizer::SimpleTokenizer(const SimpleTokenizer &)
00024 {
00025 }
00026
00027 SimpleTokenizer & SimpleTokenizer::operator = (const SimpleTokenizer &rhs)
00028 {
00029 if (this == &rhs) return *this;
00030
00031
00032 return *this;
00033 }
00034
00035 AIDA_STD::vector<AIDA_STD::string> SimpleTokenizer::tokenize(String input) const
00036 {
00037 AIDA_STD::vector<AIDA_STD::string> tokens;
00038
00039 int endOfSearch = input.size();
00040 if (terminators.size() != 0) {
00041 String::size_type pos = input.find_first_of(terminators);
00042 if (pos != String::npos) endOfSearch = pos;
00043 }
00044
00045 String newInput = input.substr(0, endOfSearch);
00046
00047 String::size_type startPos = 0;
00048 String::size_type newPos = 0;
00049 while (startPos != String::npos) {
00050 newPos = newInput.find_first_of(delimiters, startPos);
00051 if (newPos != String::npos) {
00052 tokens.push_back( newInput.substr( startPos, newPos-startPos ) );
00053 newPos++;
00054 } else {
00055 tokens.push_back( newInput.substr( startPos, endOfSearch-startPos ) );
00056 }
00057 startPos = newPos;
00058 }
00059
00060
00061
00062
00063
00064 AIDA_STD::vector<AIDA_STD::string> newTokens;
00065 AIDA_STD::vector<AIDA_STD::string>::iterator i;
00066 i = tokens.begin();
00067 while(i != tokens.end()) {
00068 if (! ( (i->find_first_not_of(" ") == String::npos) || ((*i).size() == 0) ) ) {
00069 newTokens.push_back(*i);
00070 }
00071 i++;
00072 }
00073
00074 #ifdef DEBUG_PRINT
00075 i = newTokens.begin();
00076 cerr << "found " << newTokens.size() << " newTokens " << endl;
00077 while(i != newTokens.end()) {
00078 cerr << "token = '" << *i << "' size:" << (*i).size() << endl;
00079 i++;
00080 }
00081 #endif
00082 return newTokens;
00083 }