I want to split a UTF-8 string into tokens. However it looks like that split_regex() failed and just return original string. I also tried u32regex_replace() and it indeed replaced successfully.
src.cpp:
// CutWord.cpp : This file contains the 'main' function. Program execution begins and ends there.
//
#include <iostream>
#include<vector>
#include<string>
#include <boost/algorithm/string.hpp>
#include<boost/algorithm/string/regex.hpp>
#include <boost/regex/icu.hpp>
#include<regex>
using namespace std;
int main()
{
string str="数学。数学。数学。";
//Split
vector<string> vs;
//regexU3 stores expression.
boost::u32regex regexU32= boost::make_u32regex("。");
boost::split_regex(vs, str,regexU32);
cout<<vs.size()<<endl; //only return 1.
//Check if u32regex_replace works.
string nsub=".";
string nstr=u32regex_replace(str, regexU32, nsub);
cout<<nstr<<endl;
};
I want to split a UTF-8 string into tokens. However it looks like that split_regex() failed and just return original string. I also tried
u32regex_replace()
and it indeed replaced successfully.src.cpp:
CMakeLists.txt: