#include <OpenGUI_String.h>
This class provides a complete 1 to 1 map of most std::string functions (at least to my knowledge). Implicit conversions allow this string class to work with all common C++ string formats, with specialty functions defined where implicit conversion would cause potential problems or is otherwise unavailable.
Some additional functionality is present to assist in working with characters using the 32-bit UTF-32 encoding. (Which is guaranteed to fit any Unicode character into a single code point.) Note: Reverse iterators do not have this functionality due to the ambiguity that surrounds working with UTF-16 in reverse. (Such as, where should an iterator point to represent the beginning of a surrogate pair?)
Public Types | |
| typedef size_t | size_type |
| size type used to indicate string size and character positions within the string | |
| typedef INT32 | unicode_char |
| a single 32-bit Unicode character | |
| typedef UINT16 | code_point |
| a single UTF-16 code point | |
| typedef code_point | value_type |
| value type typedef for use in iterators | |
| typedef std::basic_string< unicode_char > | utf32string |
| string type used for returning UTF-32 formatted data | |
| typedef _fwd_iterator | iterator |
| iterator | |
| typedef _rev_iterator | reverse_iterator |
| reverse iterator | |
| typedef _const_fwd_iterator | const_iterator |
| const iterator | |
| typedef _const_rev_iterator | const_reverse_iterator |
| const reverse iterator | |
Public Member Functions | |
Constructors/Destructor | |
| UTFString () | |
| default constructor, creates an empty string | |
| UTFString (const UTFString ©) | |
| copy constructor | |
| UTFString (size_type length, const code_point &ch) | |
| length copies of ch | |
| UTFString (const code_point *str) | |
| duplicate of nul-terminated sequence str | |
| UTFString (const code_point *str, size_type length) | |
| duplicate of str, length code points long | |
| UTFString (const UTFString &str, size_type index, size_type length) | |
| substring of str starting at index and length code points long | |
| UTFString (const wchar_t *w_str) | |
duplicate of nul-terminated wchar_t array | |
| UTFString (const wchar_t *w_str, size_type length) | |
| duplicate of w_str, length characters long | |
| UTFString (const std::wstring &wstr) | |
| duplicate of wstr | |
| UTFString (const char *c_str) | |
| duplicate of nul-terminated C-string c_str (UTF-8 encoding) | |
| UTFString (const char *c_str, size_type length) | |
| duplicate of c_str, length characters long (UTF-8 encoding) | |
| UTFString (const std::string &str) | |
| duplicate of str (UTF-8 encoding) | |
| UTFString (size_type length, const unicode_char &ch) | |
| length copies of ch | |
| ~UTFString () | |
| destructor | |
Utility functions | |
| size_type | size () const |
| Returns the number of code points in the current string. | |
| size_type | length () const |
| Returns the number of code points in the current string. | |
| size_type | length_Characters () const |
| Returns the number of Unicode characters in the string. | |
| size_type | max_size () const |
| returns the maximum number of UTF-16 code points that the string can hold | |
| void | reserve (size_type size) |
| sets the capacity of the string to at least size code points | |
| void | resize (size_type num, const code_point &val=0) |
| changes the size of the string to size, filling in any new area with val | |
| void | swap (UTFString &from) |
| exchanges the elements of the current string with those of from | |
| bool | empty () const |
returns true if the string has no elements, false otherwise | |
| const code_point * | c_str () const |
| returns a pointer to the first character in the current string | |
| const code_point * | data () const |
| returns a pointer to the first character in the current string | |
| size_type | capacity () const |
| returns the number of elements that the string can hold before it will need to allocate more space | |
| void | clear () |
| deletes all of the elements in the string | |
| UTFString | substr (size_type index, size_type num=npos) const |
| returns a substring of the current string, starting at index, and num characters long. | |
| void | push_back (unicode_char val) |
| appends val to the end of the string | |
| void | push_back (wchar_t val) |
| appends val to the end of the string | |
| void | push_back (code_point val) |
| appends val to the end of the string | |
| void | push_back (char val) |
| appends val to the end of the string | |
| bool | inString (unicode_char ch) const |
returns true if the given Unicode character ch is in this string | |
Stream variations | |
| const std::string & | asUTF8 () const |
| returns the current string in UTF-8 form within a std::string | |
| const char * | asUTF8_c_str () const |
| returns the current string in UTF-8 form as a nul-terminated char array | |
| const utf32string & | asUTF32 () const |
| returns the current string in UTF-32 form within a utf32string | |
| const unicode_char * | asUTF32_c_str () const |
| returns the current string in UTF-32 form as a nul-terminated unicode_char array | |
| const std::wstring & | asWStr () const |
| returns the current string in the native form of std::wstring | |
| const wchar_t * | asWStr_c_str () const |
| returns the current string in the native form of a nul-terminated wchar_t array | |
Single Character Access | |
| code_point & | at (size_type loc) |
returns a reference to the element in the string at index loc | |
| const code_point & | at (size_type loc) const |
returns a reference to the element in the string at index loc | |
| unicode_char | getChar (size_type loc) const |
| returns the data point loc evaluated as a UTF-32 value | |
| int | setChar (size_type loc, unicode_char ch) |
| sets the value of the character at loc to the Unicode value ch (UTF-32) | |
iterator acquisition | |
| iterator | begin () |
| returns an iterator to the first element of the string | |
| const_iterator | begin () const |
| returns an iterator to the first element of the string | |
| iterator | end () |
| returns an iterator just past the end of the string | |
| const_iterator | end () const |
| returns an iterator just past the end of the string | |
| reverse_iterator | rbegin () |
| returns a reverse iterator to the last element of the string | |
| const_reverse_iterator | rbegin () const |
| returns a reverse iterator to the last element of the string | |
| reverse_iterator | rend () |
| returns a reverse iterator just past the beginning of the string | |
| const_reverse_iterator | rend () const |
| returns a reverse iterator just past the beginning of the string | |
assign | |
| UTFString & | assign (iterator start, iterator end) |
| gives the current string the values from start to end | |
| UTFString & | assign (const UTFString &str) |
| assign str to the current string | |
| UTFString & | assign (const code_point *str) |
| assign the nul-terminated str to the current string | |
| UTFString & | assign (const code_point *str, size_type num) |
| assign the first num characters of str to the current string | |
| UTFString & | assign (const UTFString &str, size_type index, size_type len) |
| assign len entries from str to the current string, starting at index | |
| UTFString & | assign (size_type num, const code_point &ch) |
| assign num copies of ch to the current string | |
| UTFString & | assign (const std::wstring &wstr) |
| assign wstr to the current string (wstr is treated as a UTF-16 stream) | |
| UTFString & | assign (const wchar_t *w_str) |
| assign w_str to the current string | |
| UTFString & | assign (const wchar_t *w_str, size_type num) |
| assign the first num characters of w_str to the current string | |
| UTFString & | assign (const std::string &str) |
| assign str to the current string (str is treated as a UTF-8 stream) | |
| UTFString & | assign (const char *c_str) |
| assign c_str to the current string (c_str is treated as a UTF-8 stream) | |
| UTFString & | assign (const char *c_str, size_type num) |
| assign the first num characters of c_str to the current string (c_str is treated as a UTF-8 stream) | |
| UTFString & | assign (size_type num, const unicode_char &ch) |
| assign num copies of ch to the current string | |
append | |
| UTFString & | append (const UTFString &str) |
| appends str on to the end of the current string | |
| UTFString & | append (const code_point *str) |
| appends str on to the end of the current string | |
| UTFString & | append (const UTFString &str, size_type index, size_type len) |
| appends a substring of str starting at index that is len characters long on to the end of the current string | |
| UTFString & | append (const code_point *str, size_type num) |
| appends num characters of str on to the end of the current string | |
| UTFString & | append (size_type num, code_point ch) |
| appends num repetitions of ch on to the end of the current string | |
| UTFString & | append (iterator start, iterator end) |
| appends the sequence denoted by start and end on to the end of the current string | |
| UTFString & | append (const wchar_t *w_str, size_type num) |
| appends num characters of str on to the end of the current string | |
| UTFString & | append (size_type num, wchar_t ch) |
| appends num repetitions of ch on to the end of the current string | |
| UTFString & | append (const char *c_str, size_type num) |
| appends num characters of str on to the end of the current string (UTF-8 encoding) | |
| UTFString & | append (size_type num, char ch) |
| appends num repetitions of ch on to the end of the current string (Unicode values less than 128) | |
| UTFString & | append (size_type num, unicode_char ch) |
| appends num repetitions of ch on to the end of the current string (Full Unicode spectrum) | |
insert | |
| iterator | insert (iterator i, const code_point &ch) |
| inserts ch before the code point denoted by i | |
| UTFString & | insert (size_type index, const UTFString &str) |
| inserts str into the current string, at location index | |
| UTFString & | insert (size_type index, const code_point *str) |
| inserts str into the current string, at location index | |
| UTFString & | insert (size_type index1, const UTFString &str, size_type index2, size_type num) |
| inserts a substring of str (starting at index2 and num code points long) into the current string, at location index1 | |
| void | insert (iterator i, iterator start, iterator end) |
| inserts the code points denoted by start and end into the current string, before the code point specified by i | |
| UTFString & | insert (size_type index, const code_point *str, size_type num) |
| inserts num code points of str into the current string, at location index | |
| UTFString & | insert (size_type index, const wchar_t *w_str, size_type num) |
| inserts num code points of str into the current string, at location index | |
| UTFString & | insert (size_type index, const char *c_str, size_type num) |
| inserts num code points of str into the current string, at location index | |
| UTFString & | insert (size_type index, size_type num, code_point ch) |
| inserts num copies of ch into the current string, at location index | |
| UTFString & | insert (size_type index, size_type num, wchar_t ch) |
| inserts num copies of ch into the current string, at location index | |
| UTFString & | insert (size_type index, size_type num, char ch) |
| inserts num copies of ch into the current string, at location index | |
| UTFString & | insert (size_type index, size_type num, unicode_char ch) |
| inserts num copies of ch into the current string, at location index | |
| void | insert (iterator i, size_type num, const code_point &ch) |
| inserts num copies of ch into the current string, before the code point denoted by i | |
| void | insert (iterator i, size_type num, const wchar_t &ch) |
| inserts num copies of ch into the current string, before the code point denoted by i | |
| void | insert (iterator i, size_type num, const char &ch) |
| inserts num copies of ch into the current string, before the code point denoted by i | |
| void | insert (iterator i, size_type num, const unicode_char &ch) |
| inserts num copies of ch into the current string, before the code point denoted by i | |
erase | |
| iterator | erase (iterator loc) |
| removes the code point pointed to by loc, returning an iterator to the next character | |
| iterator | erase (iterator start, iterator end) |
| removes the code points between start and end (including the one at start but not the one at end), returning an iterator to the code point after the last code point removed | |
| UTFString & | erase (size_type index=0, size_type num=npos) |
| removes num code points from the current string, starting at index | |
replace | |
| UTFString & | replace (size_type index1, size_type num1, const UTFString &str) |
| replaces up to num1 code points of the current string (starting at index1) with str | |
| UTFString & | replace (size_type index1, size_type num1, const UTFString &str, size_type num2) |
| replaces up to num1 code points of the current string (starting at index1) with up to num2 code points from str | |
| UTFString & | replace (size_type index1, size_type num1, const UTFString &str, size_type index2, size_type num2) |
| replaces up to num1 code points of the current string (starting at index1) with up to num2 code points from str beginning at index2 | |
| UTFString & | replace (iterator start, iterator end, const UTFString &str, size_type num=npos) |
| replaces code points in the current string from start to end with num code points from str | |
| UTFString & | replace (size_type index, size_type num1, size_type num2, code_point ch) |
replaces up to num1 code points in the current string (beginning at index) with num2 copies of ch | |
| UTFString & | replace (iterator start, iterator end, size_type num, code_point ch) |
| replaces the code points in the current string from start to end with num copies of ch | |
compare | |
| int | compare (const UTFString &str) const |
| compare str to the current string | |
| int | compare (const code_point *str) const |
| compare str to the current string | |
| int | compare (size_type index, size_type length, const UTFString &str) const |
| compare str to a substring of the current string, starting at index for length characters | |
| int | compare (size_type index, size_type length, const UTFString &str, size_type index2, size_type length2) const |
| compare a substring of str to a substring of the current string, where index2 and length2 refer to str and index and length refer to the current string | |
| int | compare (size_type index, size_type length, const code_point *str, size_type length2) const |
| compare a substring of str to a substring of the current string, where the substring of str begins at zero and is length2 characters long, and the substring of the current string begins at index and is length characters long | |
| int | compare (size_type index, size_type length, const wchar_t *w_str, size_type length2) const |
| compare a substring of str to a substring of the current string, where the substring of str begins at zero and is length2 elements long, and the substring of the current string begins at index and is length characters long | |
| int | compare (size_type index, size_type length, const char *c_str, size_type length2) const |
| compare a substring of str to a substring of the current string, where the substring of str begins at zero and is length2 UTF-8 code points long, and the substring of the current string begins at index and is length characters long | |
find & rfind | |
| size_type | find (const UTFString &str, size_type index=0) const |
returns the index of the first occurrence of str within the current string, starting at index; returns UTFString::npos if nothing is found | |
| size_type | find (const code_point *cp_str, size_type index, size_type length) const |
returns the index of the first occurrence of str within the current string and within length code points, starting at index; returns UTFString::npos if nothing is found | |
| size_type | find (const char *c_str, size_type index, size_type length) const |
returns the index of the first occurrence of str within the current string and within length code points, starting at index; returns UTFString::npos if nothing is found | |
| size_type | find (const wchar_t *w_str, size_type index, size_type length) const |
returns the index of the first occurrence of str within the current string and within length code points, starting at index; returns UTFString::npos if nothing is found | |
| size_type | find (char ch, size_type index=0) const |
returns the index of the first occurrence ch within the current string, starting at index; returns UTFString::npos if nothing is found | |
| size_type | find (code_point ch, size_type index=0) const |
returns the index of the first occurrence ch within the current string, starting at index; returns UTFString::npos if nothing is found | |
| size_type | find (wchar_t ch, size_type index=0) const |
returns the index of the first occurrence ch within the current string, starting at index; returns UTFString::npos if nothing is found | |
| size_type | find (unicode_char ch, size_type index=0) const |
returns the index of the first occurrence ch within the current string, starting at index; returns UTFString::npos if nothing is found | |
| size_type | rfind (const UTFString &str, size_type index=0) const |
returns the location of the first occurrence of str in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | rfind (const code_point *cp_str, size_type index, size_type num) const |
returns the location of the first occurrence of str in the current string, doing a reverse search from index, searching at most num characters; returns UTFString::npos if nothing is found | |
| size_type | rfind (const char *c_str, size_type index, size_type num) const |
returns the location of the first occurrence of str in the current string, doing a reverse search from index, searching at most num characters; returns UTFString::npos if nothing is found | |
| size_type | rfind (const wchar_t *w_str, size_type index, size_type num) const |
returns the location of the first occurrence of str in the current string, doing a reverse search from index, searching at most num characters; returns UTFString::npos if nothing is found | |
| size_type | rfind (char ch, size_type index=0) const |
returns the location of the first occurrence of ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | rfind (code_point ch, size_type index) const |
returns the location of the first occurrence of ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | rfind (wchar_t ch, size_type index=0) const |
returns the location of the first occurrence of ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | rfind (unicode_char ch, size_type index=0) const |
returns the location of the first occurrence of ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
find_first/last_(not)_of | |
| size_type | find_first_of (const UTFString &str, size_type index=0, size_type num=npos) const |
Returns the index of the first character within the current string that matches any character in str, beginning the search at index and searching at most num characters; returns UTFString::npos if nothing is found. | |
| size_type | find_first_of (code_point ch, size_type index=0) const |
returns the index of the first occurrence of ch in the current string, starting the search at index; returns UTFString::npos if nothing is found | |
| size_type | find_first_of (char ch, size_type index=0) const |
returns the index of the first occurrence of ch in the current string, starting the search at index; returns UTFString::npos if nothing is found | |
| size_type | find_first_of (wchar_t ch, size_type index=0) const |
returns the index of the first occurrence of ch in the current string, starting the search at index; returns UTFString::npos if nothing is found | |
| size_type | find_first_of (unicode_char ch, size_type index=0) const |
returns the index of the first occurrence of ch in the current string, starting the search at index; returns UTFString::npos if nothing is found | |
| size_type | find_first_not_of (const UTFString &str, size_type index=0, size_type num=npos) const |
returns the index of the first character within the current string that does not match any character in str, beginning the search at index and searching at most num characters; returns UTFString::npos if nothing is found | |
| size_type | find_first_not_of (code_point ch, size_type index=0) const |
returns the index of the first character within the current string that does not match ch, starting the search at index; returns UTFString::npos if nothing is found | |
| size_type | find_first_not_of (char ch, size_type index=0) const |
returns the index of the first character within the current string that does not match ch, starting the search at index; returns UTFString::npos if nothing is found | |
| size_type | find_first_not_of (wchar_t ch, size_type index=0) const |
returns the index of the first character within the current string that does not match ch, starting the search at index; returns UTFString::npos if nothing is found | |
| size_type | find_first_not_of (unicode_char ch, size_type index=0) const |
returns the index of the first character within the current string that does not match ch, starting the search at index; returns UTFString::npos if nothing is found | |
| size_type | find_last_of (const UTFString &str, size_type index=npos, size_type num=npos) const |
returns the index of the first character within the current string that matches any character in str, doing a reverse search from index and searching at most num characters; returns UTFString::npos if nothing is found | |
| size_type | find_last_of (code_point ch, size_type index=npos) const |
returns the index of the first occurrence of ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | find_last_of (char ch, size_type index=npos) const |
returns the index of the first occurrence of ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | find_last_of (wchar_t ch, size_type index=npos) const |
returns the index of the first occurrence of ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | find_last_of (unicode_char ch, size_type index=npos) const |
returns the index of the first occurrence of ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | find_last_not_of (const UTFString &str, size_type index=npos, size_type num=npos) const |
returns the index of the last character within the current string that does not match any character in str, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | find_last_not_of (code_point ch, size_type index=npos) const |
returns the index of the last occurrence of a character that does not match ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | find_last_not_of (char ch, size_type index=npos) const |
returns the index of the last occurrence of a character that does not match ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | find_last_not_of (wchar_t ch, size_type index=npos) const |
returns the index of the last occurrence of a character that does not match ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
| size_type | find_last_not_of (unicode_char ch, size_type index=npos) const |
returns the index of the last occurrence of a character that does not match ch in the current string, doing a reverse search from index; returns UTFString::npos if nothing is found | |
Operators | |
| bool | operator< (const UTFString &right) const |
| less than operator | |
| bool | operator<= (const UTFString &right) const |
| less than or equal operator | |
| bool | operator> (const UTFString &right) const |
| greater than operator | |
| bool | operator>= (const UTFString &right) const |
| greater than or equal operator | |
| bool | operator== (const UTFString &right) const |
| equality operator | |
| bool | operator!= (const UTFString &right) const |
| inequality operator | |
| UTFString & | operator= (const UTFString &s) |
| assignment operator, implicitly casts all compatible types | |
| UTFString & | operator= (code_point ch) |
| assignment operator | |
| UTFString & | operator= (char ch) |
| assignment operator | |
| UTFString & | operator= (wchar_t ch) |
| assignment operator | |
| UTFString & | operator= (unicode_char ch) |
| assignment operator | |
| code_point & | operator[] (size_type index) |
| code point dereference operator | |
| const code_point & | operator[] (size_type index) const |
| code point dereference operator | |
Implicit Cast Operators | |
| operator std::string () const | |
| implicit cast to std::string | |
| operator std::wstring () const | |
| implicit cast to std::wstring | |
Static Public Member Functions | |
UTF-16 character encoding/decoding | |
| static bool | _utf16_independent_char (code_point cp) |
returns true if cp does not match the signature for the lead of follow code point of a surrogate pair in a UTF-16 sequence | |
| static bool | _utf16_surrogate_lead (code_point cp) |
returns true if cp matches the signature of a surrogate pair lead character | |
| static bool | _utf16_surrogate_follow (code_point cp) |
returns true if cp matches the signature of a surrogate pair following character | |
| static size_t | _utf16_char_length (code_point cp) |
| estimates the number of UTF-16 code points in the sequence starting with cp | |
| static size_t | _utf16_char_length (unicode_char uc) |
| returns the number of UTF-16 code points needed to represent the given UTF-32 character cp | |
| static size_t | _utf16_to_utf32 (const code_point in_cp[2], unicode_char &out_uc) |
| converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc, returns the number of code points used to create the output character (2 for surrogate pairs, otherwise 1) | |
| static size_t | _utf32_to_utf16 (const unicode_char &in_uc, code_point out_cp[2]) |
| writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding, returns the number of code points used to encode the input (always 1 or 2) | |
UTF-8 character encoding/decoding | |
| static bool | _utf8_start_char (unsigned char cp) |
returns true if cp is the beginning of a UTF-8 sequence | |
| static size_t | _utf8_char_length (unsigned char cp) |
| estimates the number of UTF-8 code points in the sequence starting with cp | |
| static size_t | _utf8_char_length (unicode_char uc) |
| returns the number of UTF-8 code points needed to represent the given UTF-32 character cp | |
| static size_t | _utf8_to_utf32 (const unsigned char in_cp[6], unicode_char &out_uc) |
| converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of bytes used to create the output character (maximum of 6) | |
| static size_t | _utf32_to_utf8 (const unicode_char &in_uc, unsigned char out_cp[6]) |
| writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number of bytes used to encode the input | |
| static size_type | _verifyUTF8 (const unsigned char *c_str) |
| verifies a UTF-8 stream, returning the total number of Unicode characters found | |
| static size_type | _verifyUTF8 (const std::string &str) |
| verifies a UTF-8 stream, returning the total number of Unicode characters found | |
Static Public Attributes | |
| static const size_type | npos = ~0 |
| the usual constant representing: not found, no limit, etc | |
Related Functions | |
| (Note that these are not member functions.) | |
| UTFString | operator+ (const UTFString &s1, const UTFString &s2) |
| string addition operator | |
| UTFString | operator+ (const UTFString &s1, UTFString::code_point c) |
| string addition operator | |
| UTFString | operator+ (const UTFString &s1, UTFString::unicode_char c) |
| string addition operator | |
| UTFString | operator+ (const UTFString &s1, char c) |
| string addition operator | |
| UTFString | operator+ (const UTFString &s1, wchar_t c) |
| string addition operator | |
| UTFString | operator+ (UTFString::code_point c, const UTFString &s2) |
| string addition operator | |
| UTFString | operator+ (UTFString::unicode_char c, const UTFString &s2) |
| string addition operator | |
| UTFString | operator+ (char c, const UTFString &s2) |
| string addition operator | |
| UTFString | operator+ (wchar_t c, const UTFString &s2) |
| string addition operator | |
| std::ostream & | operator<< (std::ostream &os, const UTFString &s) |
| std::ostream write operator | |
| std::wostream & | operator<< (std::wostream &os, const UTFString &s) |
| std::wostream write operator | |
Classes | |
| class | _base_iterator |
| base iterator class for UTFString More... | |
| class | _const_fwd_iterator |
| const forward iterator for UTFString More... | |
| class | _const_rev_iterator |
| const reverse iterator for UTFString More... | |
| class | _fwd_iterator |
| forward iterator for UTFString More... | |
| class | _rev_iterator |
| forward iterator for UTFString More... | |
| class | invalid_data |
| This exception is used when invalid data streams are encountered. More... | |
|
|
a single UTF-16 code point
|
|
|
const iterator
|
|
|
const reverse iterator
|
|
|
iterator
|
|
|
reverse iterator
|
|
|
size type used to indicate string size and character positions within the string
|
|
|
a single 32-bit Unicode character
|
|
|
string type used for returning UTF-32 formatted data
|
|
|
value type typedef for use in iterators
|
|
|
default constructor, creates an empty string
|
|
|
copy constructor
|
|
||||||||||||
|
length copies of ch
|
|
|
duplicate of nul-terminated sequence str
|
|
||||||||||||
|
duplicate of str, length code points long
|
|
||||||||||||||||
|
substring of str starting at index and length code points long
|
|
|
duplicate of nul-terminated
|
|
||||||||||||
|
duplicate of w_str, length characters long
|
|
|
duplicate of wstr
|
|
|
duplicate of nul-terminated C-string c_str (UTF-8 encoding)
|
|
||||||||||||
|
duplicate of c_str, length characters long (UTF-8 encoding)
|
|
|
duplicate of str (UTF-8 encoding)
|
|
||||||||||||
|
length copies of ch
|
|
|
destructor
|
|
|
returns the number of UTF-16 code points needed to represent the given UTF-32 character cp
|
|
|
estimates the number of UTF-16 code points in the sequence starting with cp
|
|
|
returns
|
|
|
returns
|
|
|
returns
|
|
||||||||||||
|
converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc, returns the number of code points used to create the output character (2 for surrogate pairs, otherwise 1) This function does it's best to prevent error conditions, verifying complete surrogate pairs before applying the algorithm. In the event that half of a pair is found it will happily generate a value in the 0xD800 - 0xDFFF range, which is normally an invalid Unicode value but we preserve them for use as sentinel values. |
|
||||||||||||
|
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding, returns the number of code points used to encode the input (always 1 or 2)
This function, like its counterpart, will happily create invalid UTF-16 surrogate pairs. These invalid entries will be created for any value of
|
|
||||||||||||
|
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number of bytes used to encode the input
|
|
|
returns the number of UTF-8 code points needed to represent the given UTF-32 character cp
|
|
|
estimates the number of UTF-8 code points in the sequence starting with cp
|
|
|
returns
|
|
||||||||||||
|
converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of bytes used to create the output character (maximum of 6)
|
|
|
verifies a UTF-8 stream, returning the total number of Unicode characters found
|
|
|
verifies a UTF-8 stream, returning the total number of Unicode characters found
|
|
||||||||||||
|
appends num repetitions of ch on to the end of the current string (Full Unicode spectrum)
|
|
||||||||||||
|
appends num repetitions of ch on to the end of the current string (Unicode values less than 128)
|
|
||||||||||||
|
appends num characters of str on to the end of the current string (UTF-8 encoding)
|
|
||||||||||||
|
appends num repetitions of ch on to the end of the current string
|
|
||||||||||||
|
appends num characters of str on to the end of the current string
|
|
||||||||||||
|
appends the sequence denoted by start and end on to the end of the current string
|
|
||||||||||||
|
appends num repetitions of ch on to the end of the current string
|
|
||||||||||||
|
appends num characters of str on to the end of the current string
|
|
||||||||||||||||
|
appends a substring of str starting at index that is len characters long on to the end of the current string
|
|
|
appends str on to the end of the current string
|
|
|
appends str on to the end of the current string
|
|
||||||||||||
|
assign num copies of ch to the current string
|
|
||||||||||||
|
assign the first num characters of c_str to the current string (c_str is treated as a UTF-8 stream)
|
|
|
assign c_str to the current string (c_str is treated as a UTF-8 stream)
|
|
|
assign str to the current string (str is treated as a UTF-8 stream)
|
|
||||||||||||
|
assign the first num characters of w_str to the current string
|
|
|
assign w_str to the current string
|
|
|
assign wstr to the current string (wstr is treated as a UTF-16 stream)
|
|
||||||||||||
|
assign num copies of ch to the current string
|
|
||||||||||||||||
|
assign len entries from str to the current string, starting at index
|
|
||||||||||||
|
assign the first num characters of str to the current string
|
|
|
assign the nul-terminated str to the current string
|
|
|
assign str to the current string
|
|
||||||||||||
|
gives the current string the values from start to end
|
|
|
returns the current string in UTF-32 form within a utf32string
|
|
|
returns the current string in UTF-32 form as a nul-terminated unicode_char array
|
|
|
returns the current string in UTF-8 form within a std::string
|
|
|
returns the current string in UTF-8 form as a nul-terminated char array
|
|
|
returns the current string in the native form of std::wstring
|
|
|
returns the current string in the native form of a nul-terminated wchar_t array
|
|
|
returns a reference to the element in the string at index
|
|
|
returns a reference to the element in the string at index
|
|
|
returns an iterator to the first element of the string
|
|
|
returns an iterator to the first element of the string
|
|
|
returns a pointer to the first character in the current string
|
|
|
returns the number of elements that the string can hold before it will need to allocate more space
|
|
|
deletes all of the elements in the string
|
|
||||||||||||||||||||
|
compare a substring of str to a substring of the current string, where the substring of str begins at zero and is length2 UTF-8 code points long, and the substring of the current string begins at index and is length characters long
|
|
||||||||||||||||||||
|
compare a substring of str to a substring of the current string, where the substring of str begins at zero and is length2 elements long, and the substring of the current string begins at index and is length characters long
|
|
||||||||||||||||||||
|
compare a substring of str to a substring of the current string, where the substring of str begins at zero and is length2 characters long, and the substring of the current string begins at index and is length characters long
|
|
||||||||||||||||||||||||
|
compare a substring of str to a substring of the current string, where index2 and length2 refer to str and index and length refer to the current string
|
|
||||||||||||||||
|
compare str to a substring of the current string, starting at index for length characters
|
|
|
compare str to the current string
|
|
|
compare str to the current string
|
|
|
returns a pointer to the first character in the current string
|
|
|
returns
|
|
|
returns an iterator just past the end of the string
|
|
|
returns an iterator just past the end of the string
|
|
||||||||||||
|
removes num code points from the current string, starting at index
|
|
||||||||||||
|
removes the code points between start and end (including the one at start but not the one at end), returning an iterator to the code point after the last code point removed
|
|
|
removes the code point pointed to by loc, returning an iterator to the next character
|
|
||||||||||||
|
returns the index of the first occurrence ch within the current string, starting at index; returns ch can fully represent any Unicode character |
|
||||||||||||
|
returns the index of the first occurrence ch within the current string, starting at index; returns ch is only capable of representing Unicode values up to U+FFFF (65535) |
|
||||||||||||
|
returns the index of the first occurrence ch within the current string, starting at index; returns ch is only capable of representing Unicode values up to U+FFFF (65535) |
|
||||||||||||
|
returns the index of the first occurrence ch within the current string, starting at index; returns ch is only capable of representing Unicode values up to U+007F (127) |
|
||||||||||||||||
|
returns the index of the first occurrence of str within the current string and within length code points, starting at index; returns cp_str is a UTF-16 encoded string |
|
||||||||||||||||
|
returns the index of the first occurrence of str within the current string and within length code points, starting at index; returns cp_str is a UTF-8 encoded string |
|
||||||||||||||||
|
returns the index of the first occurrence of str within the current string and within length code points, starting at index; returns cp_str is a UTF-16 encoded string |
|
||||||||||||
|
returns the index of the first occurrence of str within the current string, starting at index; returns str is a UTF-16 encoded string, but through implicit casting can also be a UTF-8 encoded string (const char* or std::string) |
|
||||||||||||
|
returns the index of the first character within the current string that does not match ch, starting the search at index; returns
|
|
||||||||||||
|
returns the index of the first character within the current string that does not match ch, starting the search at index; returns
|
|
||||||||||||
|
returns the index of the first character within the current string that does not match ch, starting the search at index; returns
|
|
||||||||||||
|
returns the index of the first character within the current string that does not match ch, starting the search at index; returns
|
|
||||||||||||||||
|
returns the index of the first character within the current string that does not match any character in str, beginning the search at index and searching at most num characters; returns
|
|
||||||||||||
|
returns the index of the first occurrence of ch in the current string, starting the search at index; returns
|
|
||||||||||||
|
returns the index of the first occurrence of ch in the current string, starting the search at index; returns
|
|
||||||||||||
|
returns the index of the first occurrence of ch in the current string, starting the search at index; returns
|
|
||||||||||||
|
returns the index of the first occurrence of ch in the current string, starting the search at index; returns
|
|
||||||||||||||||
|
Returns the index of the first character within the current string that matches any character in str, beginning the search at index and searching at most num characters; returns
|
|
||||||||||||
|
returns the index of the last occurrence of a character that does not match ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||
|
returns the index of the last occurrence of a character that does not match ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||
|
returns the index of the last occurrence of a character that does not match ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||
|
returns the index of the last occurrence of a character that does not match ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||||||
|
returns the index of the last character within the current string that does not match any character in str, doing a reverse search from index; returns
|
|
||||||||||||
|
returns the index of the first occurrence of ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||
|
returns the index of the first occurrence of ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||
|
returns the index of the first occurrence of ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||
|
returns the index of the first occurrence of ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||||||
|
returns the index of the first character within the current string that matches any character in str, doing a reverse search from index and searching at most num characters; returns
|
|
|
returns the data point loc evaluated as a UTF-32 value This function will will only properly decode surrogate pairs when loc points to the index of a lead code point that is followed by a trailing code point. Evaluating the trailing code point itself, or pointing to a code point that is a sentinel value (part of a broken pair) will return the value of just that code point (not a valid Unicode value, but useful as a sentinel value). |
|
||||||||||||||||
|
inserts num copies of ch into the current string, before the code point denoted by i
|
|
||||||||||||||||
|
inserts num copies of ch into the current string, before the code point denoted by i
|
|
||||||||||||||||
|
inserts num copies of ch into the current string, before the code point denoted by i
|
|
||||||||||||||||
|
inserts num copies of ch into the current string, before the code point denoted by i
|
|
||||||||||||||||
|
inserts num copies of ch into the current string, at location index
|
|
||||||||||||||||
|
inserts num copies of ch into the current string, at location index
|
|
||||||||||||||||
|
inserts num copies of ch into the current string, at location index
|
|
||||||||||||||||
|
inserts num copies of ch into the current string, at location index
|
|
||||||||||||||||
|
inserts num code points of str into the current string, at location index
|
|
||||||||||||||||
|
inserts num code points of str into the current string, at location index
|
|
||||||||||||||||
|
inserts num code points of str into the current string, at location index
|
|
||||||||||||||||
|
inserts the code points denoted by start and end into the current string, before the code point specified by i
|
|
||||||||||||||||||||
|
inserts a substring of str (starting at index2 and num code points long) into the current string, at location index1
|
|
||||||||||||
|
inserts str into the current string, at location index
|
|
||||||||||||
|
inserts str into the current string, at location index
|
|
||||||||||||
|
inserts ch before the code point denoted by i
|
|
|
returns
|
|
|
Returns the number of code points in the current string.
|
|
|
Returns the number of Unicode characters in the string. Executes in linear time. |
|
|
returns the maximum number of UTF-16 code points that the string can hold
|
|
|
implicit cast to std::string
|
|
|
implicit cast to std::wstring
|
|
|
inequality operator
|
|
|
less than operator
|
|
|
less than or equal operator
|
|
|
assignment operator
|
|
|
assignment operator
|
|
|
assignment operator
|
|
|
assignment operator
|
|
|
assignment operator, implicitly casts all compatible types
|
|
|
equality operator
|
|
|
greater than operator
|
|
|
greater than or equal operator
|
|
|
code point dereference operator
|
|
|
code point dereference operator
|
|
|
appends val to the end of the string Limited to characters under the 127 value barrier. |
|
|
appends val to the end of the string This can be used to push surrogate pair code points, you'll just need to push them one after the other. |
|
|
appends val to the end of the string
|
|
|
appends val to the end of the string
|
|
|
returns a reverse iterator to the last element of the string
|
|
|
returns a reverse iterator to the last element of the string
|
|
|
returns a reverse iterator just past the beginning of the string
|
|
|
returns a reverse iterator just past the beginning of the string
|
|
||||||||||||||||||||
|
replaces the code points in the current string from start to end with num copies of ch
|
|
||||||||||||||||||||
|
replaces up to num1 code points in the current string (beginning at index) with
|
|
||||||||||||||||||||
|
replaces code points in the current string from start to end with num code points from str
|
|
||||||||||||||||||||||||
|
replaces up to num1 code points of the current string (starting at index1) with up to num2 code points from str beginning at index2
|
|
||||||||||||||||||||
|
replaces up to num1 code points of the current string (starting at index1) with up to num2 code points from str
|
|
||||||||||||||||
|
replaces up to num1 code points of the current string (starting at index1) with str
|
|
|
sets the capacity of the string to at least size code points
|
|
||||||||||||
|
changes the size of the string to size, filling in any new area with val
|
|
||||||||||||
|
returns the location of the first occurrence of ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||
|
returns the location of the first occurrence of ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||
|
returns the location of the first occurrence of ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||
|
returns the location of the first occurrence of ch in the current string, doing a reverse search from index; returns
|
|
||||||||||||||||
|
returns the location of the first occurrence of str in the current string, doing a reverse search from index, searching at most num characters; returns
|
|
||||||||||||||||
|
returns the location of the first occurrence of str in the current string, doing a reverse search from index, searching at most num characters; returns
|
|
||||||||||||||||
|
returns the location of the first occurrence of str in the current string, doing a reverse search from index, searching at most num characters; returns
|
|
||||||||||||
|
returns the location of the first occurrence of str in the current string, doing a reverse search from index; returns
|
|
||||||||||||
|
sets the value of the character at loc to the Unicode value ch (UTF-32) Providing sentinel values (values between U+D800-U+DFFF) are accepted, but you should be aware that you can also unwittingly create a valid surrogate pair if you don't pay attention to what you are doing.
|
|
|
Returns the number of code points in the current string.
|
|
||||||||||||
|
returns a substring of the current string, starting at index, and num characters long.
If num is omitted, it will default to |
|
|
exchanges the elements of the current string with those of from
|
|
||||||||||||
|
string addition operator
|
|
||||||||||||
|
string addition operator
|
|
||||||||||||
|
string addition operator
|
|
||||||||||||
|
string addition operator
|
|
||||||||||||
|
string addition operator
|
|
||||||||||||
|
string addition operator
|
|
||||||||||||
|
string addition operator
|
|
||||||||||||
|
string addition operator
|
|
||||||||||||
|
string addition operator
|
|
||||||||||||
|
std::wostream write operator
|
|
||||||||||||
|
std::ostream write operator
|
|
|
the usual constant representing: not found, no limit, etc
|