Parse (split) a string in C++ using string delimiter (standard C++)
Parse (split) a string in C++ using string delimiter (standard C++)
Question
I am parsing a string in C++ using the following:
using namespace std;
string parsed,input="text to be parsed";
stringstream input_stringstream(input);
if (getline(input_stringstream,parsed,' '))
{
// do some processing.
}
Parsing with a single char delimiter is fine. But what if I want to use a string as delimiter.
Example: I want to split:
scott>=tiger
with >=
as delimiter so that I can get scott and tiger.
Accepted Answer
You can use the std::string::find()
function to find the position of your string delimiter, then use std::string::substr()
to get a token.
Example:
std::string s = "scott>=tiger";
std::string delimiter = ">=";
std::string token = s.substr(0, s.find(delimiter)); // token is "scott"
The
find(const string& str, size_t pos = 0)
function returns the position of the first occurrence ofstr
in the string, ornpos
if the string is not found.The
substr(size_t pos = 0, size_t n = npos)
function returns a substring of the object, starting at positionpos
and of lengthnpos
.
If you have multiple delimiters, after you have extracted one token, you can remove it (delimiter included) to proceed with subsequent extractions (if you want to preserve the original string, just use s = s.substr(pos + delimiter.length());
):
s.erase(0, s.find(delimiter) + delimiter.length());
This way you can easily loop to get each token.
Complete Example
std::string s = "scott>=tiger>=mushroom";
std::string delimiter = ">=";
size_t pos = 0;
std::string token;
while ((pos = s.find(delimiter)) != std::string::npos) {
token = s.substr(0, pos);
std::cout << token << std::endl;
s.erase(0, pos + delimiter.length());
}
std::cout << s << std::endl;
Output:
scott
tiger
mushroom
Read more… Read less…
This method uses std::string::find
without mutating the original string by remembering the beginning and end of the previous substring token.
#include <iostream>
#include <string>
int main()
{
std::string s = "scott>=tiger";
std::string delim = ">=";
auto start = 0U;
auto end = s.find(delim);
while (end != std::string::npos)
{
std::cout << s.substr(start, end - start) << std::endl;
start = end + delim.length();
end = s.find(delim, start);
}
std::cout << s.substr(start, end);
}
You can use next function to split string:
vector<string> split(const string& str, const string& delim)
{
vector<string> tokens;
size_t prev = 0, pos = 0;
do
{
pos = str.find(delim, prev);
if (pos == string::npos) pos = str.length();
string token = str.substr(prev, pos-prev);
if (!token.empty()) tokens.push_back(token);
prev = pos + delim.length();
}
while (pos < str.length() && prev < str.length());
return tokens;
}
For string delimiter
Split string based on a string delimiter. Such as splitting string "adsf-+qwret-+nvfkbdsj-+orthdfjgh-+dfjrleih"
based on string delimiter "-+"
, output will be {"adsf", "qwret", "nvfkbdsj", "orthdfjgh", "dfjrleih"}
#include <iostream>
#include <sstream>
#include <vector>
using namespace std;
// for string delimiter
vector<string> split (string s, string delimiter) {
size_t pos_start = 0, pos_end, delim_len = delimiter.length();
string token;
vector<string> res;
while ((pos_end = s.find (delimiter, pos_start)) != string::npos) {
token = s.substr (pos_start, pos_end - pos_start);
pos_start = pos_end + delim_len;
res.push_back (token);
}
res.push_back (s.substr (pos_start));
return res;
}
int main() {
string str = "adsf-+qwret-+nvfkbdsj-+orthdfjgh-+dfjrleih";
string delimiter = "-+";
vector<string> v = split (str, delimiter);
for (auto i : v) cout << i << endl;
return 0;
}
Output
adsf qwret nvfkbdsj orthdfjgh dfjrleih
For single character delimiter
Split string based on a character delimiter. Such as splitting string "adsf+qwer+poui+fdgh"
with delimiter "+"
will output {"adsf", "qwer", "poui", "fdg"h}
#include <iostream>
#include <sstream>
#include <vector>
using namespace std;
vector<string> split (const string &s, char delim) {
vector<string> result;
stringstream ss (s);
string item;
while (getline (ss, item, delim)) {
result.push_back (item);
}
return result;
}
int main() {
string str = "adsf+qwer+poui+fdgh";
vector<string> v = split (str, '+');
for (auto i : v) cout << i << endl;
return 0;
}
Output
adsf qwer poui fdgh
This code splits lines from text, and add everyone into a vector.
vector<string> split(char *phrase, string delimiter){
vector<string> list;
string s = string(phrase);
size_t pos = 0;
string token;
while ((pos = s.find(delimiter)) != string::npos) {
token = s.substr(0, pos);
list.push_back(token);
s.erase(0, pos + delimiter.length());
}
list.push_back(s);
return list;
}
Called by:
vector<string> listFilesMax = split(buffer, "\n");
strtok allows you to pass in multiple chars as delimiters. I bet if you passed in ">=" your example string would be split correctly (even though the > and = are counted as individual delimiters).
EDIT if you don't want to use c_str()
to convert from string to char*, you can use substr and find_first_of to tokenize.
string token, mystring("scott>=tiger");
while(token != mystring){
token = mystring.substr(0,mystring.find_first_of(">="));
mystring = mystring.substr(mystring.find_first_of(">=") + 1);
printf("%s ",token.c_str());
}