900 lines
21 KiB
C++
900 lines
21 KiB
C++
|
/**
|
||
|
* UTF8 string library.
|
||
|
*
|
||
|
* Allows to use native UTF8 sequences as a string class. Has many overloaded
|
||
|
* operators that provides such features as concatenation, types converting and
|
||
|
* much more.
|
||
|
*
|
||
|
* Distributed under GPL v3
|
||
|
*
|
||
|
* Author:
|
||
|
* Grigory Gorelov (gorelov@grigory.info)
|
||
|
* See more information on grigory.info
|
||
|
*/
|
||
|
|
||
|
#include "String.h"
|
||
|
#include <string>
|
||
|
#include <sstream>
|
||
|
#include <iostream>
|
||
|
#include <stdlib.h>
|
||
|
#include <ostream>
|
||
|
#include <stdint.h>
|
||
|
#include <errno.h>
|
||
|
#include "Exception.h"
|
||
|
|
||
|
void UTF8::String::ConvertFromDouble(const long double d, const UTF8::String &ThousandSeparator, const UTF8::String &FractionSeparator, const int IntegerPartLength, const int FractionPartLength) {
|
||
|
|
||
|
std::ostringstream os;
|
||
|
os.precision(15);
|
||
|
os << d;
|
||
|
|
||
|
|
||
|
UTF8::String Number(os.str());
|
||
|
UTF8::String Integer, Fraction;
|
||
|
|
||
|
// Extracting integer and fraction
|
||
|
std::vector <UTF8::String> Extracted = Number.Explode(".");
|
||
|
|
||
|
unsigned int IntegerLength;
|
||
|
if (IntegerPartLength) {
|
||
|
IntegerLength = IntegerPartLength;
|
||
|
} else {
|
||
|
IntegerLength = Extracted[0].Length();
|
||
|
}
|
||
|
|
||
|
unsigned int FractionLength;
|
||
|
if (FractionPartLength) {
|
||
|
FractionLength = FractionPartLength;
|
||
|
} else {
|
||
|
if (Extracted.size() > 1) {
|
||
|
FractionLength = Extracted[1].Length();
|
||
|
} else {
|
||
|
FractionLength = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Parsing integer
|
||
|
for (unsigned int i = 0; i < IntegerLength; i++) {
|
||
|
if ((i > 0) && (i % 3 == 0)) {
|
||
|
Integer = ThousandSeparator + Integer;
|
||
|
}
|
||
|
|
||
|
if (Extracted[0].Length() < i + 1) {
|
||
|
Integer = "0" + Integer;
|
||
|
} else {
|
||
|
Integer = Extracted[0][Extracted[0].Length() - 1 - i] + Integer;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
// Parsing fraction
|
||
|
if (FractionLength) {
|
||
|
Fraction = FractionSeparator;
|
||
|
for (unsigned int i = 0; i < FractionLength; i++) {
|
||
|
if ((Extracted.size() > 1) && (Extracted[1].Length() > i)) {
|
||
|
Fraction += Extracted[1][i];
|
||
|
} else {
|
||
|
Fraction += "0";
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
* this = Integer + Fraction;
|
||
|
|
||
|
|
||
|
}
|
||
|
|
||
|
bool UTF8::String::HasThisString(const UTF8::String &Str) const {
|
||
|
|
||
|
return GetSubstringPosition(Str) != -1;
|
||
|
}
|
||
|
|
||
|
bool UTF8::String::CharacterIsOneOfThese(const UTF8::String &Characters) const {
|
||
|
if (Length() == 1) {
|
||
|
for (unsigned int i = 0; i < Characters.Length(); i++) {
|
||
|
if (Characters[i] == *this) {
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
} else {
|
||
|
|
||
|
throw Exception("[CharacterIsOneOfThese] String is more then one character length: \"" + ToString() + "\"", UTF8::Exception::StringIsNotACharacter);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
UTF8::String UTF8::String::FromFile(const UTF8::String &Path) {
|
||
|
UTF8::String s;
|
||
|
|
||
|
std::ifstream File;
|
||
|
File.open(Path.ToConstCharPtr());
|
||
|
|
||
|
|
||
|
if (File.is_open()) {
|
||
|
|
||
|
File.seekg(0, std::ios::end);
|
||
|
unsigned int Length = File.tellg();
|
||
|
File.seekg(0, std::ios::beg);
|
||
|
|
||
|
char *buf = new char[Length + 1];
|
||
|
memset(buf, 0, Length + 1);
|
||
|
|
||
|
File.read(buf, Length);
|
||
|
s.AppendString(buf);
|
||
|
|
||
|
delete buf;
|
||
|
} else {
|
||
|
throw Exception("Cannot open file \"" + Path.ToString() + "\"", UTF8::Exception::FileNotFound);
|
||
|
}
|
||
|
|
||
|
File.close();
|
||
|
|
||
|
return s;
|
||
|
}
|
||
|
|
||
|
long UTF8::String::Search(const UTF8::String &SubString, unsigned int StartPosition, int Direction) const {
|
||
|
|
||
|
unsigned int SubstringLength = SubString.Length();
|
||
|
unsigned int n = StartPosition;
|
||
|
|
||
|
if (n > Length() - SubstringLength) {
|
||
|
if (Direction == SearchDirectionFromLeftToRight) {
|
||
|
return -1;
|
||
|
} else {
|
||
|
n = Length() - SubstringLength;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (n < 0) {
|
||
|
if (Direction == SearchDirectionFromRightToLeft) {
|
||
|
return -1;
|
||
|
} else {
|
||
|
n = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
while (((Direction == SearchDirectionFromLeftToRight) && (n < Length() - SubstringLength + 1)) || ((Direction == SearchDirectionFromRightToLeft) && (n >= 0))) {
|
||
|
|
||
|
if (this->Substring(n, SubstringLength) == SubString) {
|
||
|
|
||
|
return n;
|
||
|
}
|
||
|
|
||
|
n += Direction == SearchDirectionFromLeftToRight ? 1 : -1;
|
||
|
}
|
||
|
|
||
|
return -1;
|
||
|
|
||
|
}
|
||
|
|
||
|
std::ostream & operator<<(std::ostream &os, const UTF8::String &s) {
|
||
|
os << s.ToString();
|
||
|
|
||
|
return os;
|
||
|
}
|
||
|
|
||
|
bool operator==(const char *str, const UTF8::String &StringObj) {
|
||
|
|
||
|
return StringObj == str;
|
||
|
}
|
||
|
|
||
|
bool operator==(const std::string &str, const UTF8::String &StringObj) {
|
||
|
|
||
|
return StringObj == str;
|
||
|
}
|
||
|
|
||
|
bool operator!=(const char *str, const UTF8::String &StringObj) {
|
||
|
|
||
|
return StringObj != str;
|
||
|
}
|
||
|
|
||
|
bool operator!=(const std::string &str, const UTF8::String &StringObj) {
|
||
|
|
||
|
return StringObj != str;
|
||
|
}
|
||
|
|
||
|
UTF8::String UTF8::String::Quote() const {
|
||
|
return "«"+(*this)+"»";
|
||
|
}
|
||
|
|
||
|
UTF8::String UTF8::String::Trim() const {
|
||
|
UTF8::String result = *this;
|
||
|
long i = 0;
|
||
|
|
||
|
while ((result[i] == " ") || (result[i] == "\n") || (result[i] == "\r") || (result[i] == "\t")) {
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
if (i == result.Length()) {
|
||
|
return UTF8::String();
|
||
|
}
|
||
|
|
||
|
|
||
|
long j = result.Length();
|
||
|
while ((result[j - 1] == " ") || (result[j - 1] == "\n") || (result[j - 1] == "\r") || (result[j - 1] == "\t")) {
|
||
|
j--;
|
||
|
}
|
||
|
|
||
|
|
||
|
result = result.Substring(i, j - i);
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
UTF8::String UTF8::String::Replace(const UTF8::String &Search, const UTF8::String &Replace) const {
|
||
|
UTF8::String result = *this;
|
||
|
|
||
|
// Long to cover unsigned int and -1
|
||
|
long pos = 0;
|
||
|
while ((pos = result.Search(Search, pos)) != -1) {
|
||
|
|
||
|
result = result.SubstringReplace(pos, Search.Length(), Replace);
|
||
|
|
||
|
// Next time we search after replacement
|
||
|
pos += Replace.Length();
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
|
||
|
}
|
||
|
|
||
|
UTF8::String UTF8::String::SubstringReplace(unsigned int Start, unsigned int Count, const UTF8::String &Replace) const {
|
||
|
if (Start < Length()) {
|
||
|
return (Start ? Substring(0, Start) : UTF8::String())+Replace + Substring(Start + Count);
|
||
|
} else {
|
||
|
|
||
|
return *this;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
UTF8::String UTF8::String::Implode(const std::vector <UTF8::String> &Strings, const UTF8::String &Separator) {
|
||
|
if (Strings.size()) {
|
||
|
UTF8::String Result;
|
||
|
|
||
|
for (unsigned int i = 0; i < Strings.size(); i++) {
|
||
|
if (Result.Length()) {
|
||
|
Result += Separator;
|
||
|
}
|
||
|
|
||
|
Result += Strings[i];
|
||
|
}
|
||
|
|
||
|
return Result;
|
||
|
} else {
|
||
|
|
||
|
return UTF8::String();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
std::vector <UTF8::String> UTF8::String::Explode(const String &Separator) const {
|
||
|
std::vector <UTF8::String> v;
|
||
|
|
||
|
unsigned int prev = 0;
|
||
|
|
||
|
int i = 0;
|
||
|
|
||
|
while (i < Length() - Separator.Length() + 1) {
|
||
|
if (Substring(i, Separator.Length()) == Separator) {
|
||
|
if (i - prev > 0) {
|
||
|
v.push_back(Substring(prev, i - prev));
|
||
|
}
|
||
|
i += Separator.Length();
|
||
|
prev = i;
|
||
|
} else {
|
||
|
i++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (prev < Length()) {
|
||
|
|
||
|
v.push_back(Substring(prev, Length() - prev));
|
||
|
}
|
||
|
|
||
|
return v;
|
||
|
}
|
||
|
|
||
|
UTF8::String operator+(const char *CharPtr, const UTF8::String &StringObj) {
|
||
|
UTF8::String s(CharPtr);
|
||
|
s += StringObj;
|
||
|
|
||
|
return s;
|
||
|
|
||
|
}
|
||
|
|
||
|
UTF8::String operator+(const std::string & str, const UTF8::String &StringObj) {
|
||
|
UTF8::String s(str);
|
||
|
s += StringObj;
|
||
|
|
||
|
return s;
|
||
|
|
||
|
}
|
||
|
|
||
|
UTF8::String operator+(const long l, const UTF8::String &StringObj) {
|
||
|
UTF8::String s(l);
|
||
|
s += StringObj;
|
||
|
|
||
|
return s;
|
||
|
}
|
||
|
|
||
|
UTF8::String UTF8::String::operator+(const UTF8::String &s) const {
|
||
|
UTF8::String res(*this);
|
||
|
res.AppendString(s.Data);
|
||
|
|
||
|
return res;
|
||
|
}
|
||
|
|
||
|
UTF8::String & UTF8::String::operator+=(const UTF8::String &s) {
|
||
|
AppendString(s.Data);
|
||
|
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
void UTF8::String::AppendString(const char *str) {
|
||
|
// The functions that can fill buffer directly:
|
||
|
//
|
||
|
// SetString AppendString
|
||
|
//
|
||
|
// Make shure all preparations are done there
|
||
|
|
||
|
if (str && strlen(str)) {
|
||
|
if (DataArrayLength) {
|
||
|
CheckIfStringIsCorrect(str);
|
||
|
|
||
|
unsigned int StrLength = strlen(str);
|
||
|
|
||
|
Data = (char *) realloc(Data, DataArrayLength + StrLength + 1);
|
||
|
|
||
|
if (Data != NULL) {
|
||
|
|
||
|
memcpy(Data + DataArrayLength, str, StrLength);
|
||
|
DataArrayLength += StrLength;
|
||
|
Data[DataArrayLength] = 0;
|
||
|
|
||
|
CalculateStringLength();
|
||
|
} else {
|
||
|
throw Exception("[AppendString] Cannot realloc any more memory");
|
||
|
}
|
||
|
} else {
|
||
|
|
||
|
SetString(str);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void UTF8::String::SetString(const char *str) {
|
||
|
// The functions that can fill buffer directly:
|
||
|
//
|
||
|
// SetString AppendString
|
||
|
//
|
||
|
// Make shure all preparations are done there
|
||
|
|
||
|
if (str && strlen(str)) {
|
||
|
CheckIfStringIsCorrect(str);
|
||
|
|
||
|
Empty();
|
||
|
|
||
|
DataArrayLength = strlen(str);
|
||
|
Data = new char[DataArrayLength + 1];
|
||
|
Data[DataArrayLength] = 0;
|
||
|
|
||
|
memcpy(Data, str, DataArrayLength);
|
||
|
|
||
|
CalculateStringLength();
|
||
|
} else {
|
||
|
|
||
|
Empty();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void UTF8::String::ConvertFromInt64(int64_t n) {
|
||
|
Empty();
|
||
|
|
||
|
if (n) {
|
||
|
bool minus;
|
||
|
if (n < 0) {
|
||
|
n = -n;
|
||
|
minus = true;
|
||
|
} else {
|
||
|
minus = false;
|
||
|
}
|
||
|
|
||
|
char tmp[32] = "0";
|
||
|
const char *num = "0123456789";
|
||
|
memset(tmp, 0, 32);
|
||
|
|
||
|
unsigned int i = 30;
|
||
|
|
||
|
while (n) {
|
||
|
tmp[i] = num[n % 10];
|
||
|
n /= 10;
|
||
|
i--;
|
||
|
|
||
|
if ((i < 0) || ((i < 1) && minus)) {
|
||
|
throw Exception("[ConvertFromInt] Cycle terminated, buffer overflow.");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (minus) {
|
||
|
tmp[i] = '-';
|
||
|
i--;
|
||
|
}
|
||
|
|
||
|
SetString(tmp + i + 1);
|
||
|
} else {
|
||
|
|
||
|
SetString("0");
|
||
|
|
||
|
}
|
||
|
|
||
|
CalculateStringLength();
|
||
|
}
|
||
|
|
||
|
UTF8::String::String(const long double d, const UTF8::String &ThousandSeparator, const UTF8::String &DecimalSeparator, const int IntegerPartCount, const int FractionPartCount) {
|
||
|
|
||
|
InitString();
|
||
|
ConvertFromDouble(d, ThousandSeparator, DecimalSeparator, IntegerPartCount, FractionPartCount);
|
||
|
}
|
||
|
|
||
|
void UTF8::String::InitString() {
|
||
|
|
||
|
Data = NULL;
|
||
|
DataArrayLength = 0;
|
||
|
StringLength = 0;
|
||
|
}
|
||
|
|
||
|
UTF8::String::String() {
|
||
|
|
||
|
InitString();
|
||
|
}
|
||
|
|
||
|
UTF8::String::String(const std::string & s) {
|
||
|
|
||
|
InitString();
|
||
|
CheckIfStringIsCorrect(s.c_str());
|
||
|
AppendString(s.c_str());
|
||
|
CalculateStringLength();
|
||
|
}
|
||
|
|
||
|
int UTF8::String::GetSymbolIndexInDataArray(unsigned int Position) const {
|
||
|
if (Position >= StringLength) {
|
||
|
throw Exception((UTF8::String("[GetSymbolIndexInDataArray] trying to get position beyond the end of string. StringLength: ") + StringLength + " Position: " + Position + " String: [" + Data + "]").ToString());
|
||
|
}
|
||
|
|
||
|
unsigned int n = 0;
|
||
|
for (unsigned int i = 0; i < Position; i++) {
|
||
|
|
||
|
n += GetSequenceLength(Data + n);
|
||
|
}
|
||
|
|
||
|
return n;
|
||
|
|
||
|
}
|
||
|
|
||
|
long UTF8::String::GetSubstringPosition(const UTF8::String &SubString, unsigned int Start) const {
|
||
|
if (SubString.Length() > StringLength) {
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
unsigned int ScansCount = StringLength - SubString.StringLength + 1 - Start;
|
||
|
for (unsigned int i = 0; i < ScansCount; i++) {
|
||
|
if (this->Substring(i + Start, SubString.StringLength) == SubString) {
|
||
|
|
||
|
return i + Start;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
UTF8::String UTF8::String::Substring(unsigned int Start, unsigned int Count) const {
|
||
|
if (Start >= StringLength) {
|
||
|
return UTF8::String();
|
||
|
}
|
||
|
|
||
|
if ((Start + Count > StringLength) || (Count == 0)) {
|
||
|
Count = StringLength - Start;
|
||
|
}
|
||
|
|
||
|
|
||
|
unsigned int StartIndex = GetSymbolIndexInDataArray(Start);
|
||
|
unsigned int CopyAmount = 0;
|
||
|
|
||
|
|
||
|
for (unsigned int i = 0; i < Count; i++) {
|
||
|
CopyAmount += GetSequenceLength(Data + StartIndex + CopyAmount);
|
||
|
}
|
||
|
|
||
|
char *tmp = new char[CopyAmount + 1];
|
||
|
memcpy(tmp, Data + StartIndex, CopyAmount);
|
||
|
tmp[CopyAmount] = 0;
|
||
|
|
||
|
UTF8::String r(tmp);
|
||
|
delete tmp;
|
||
|
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
UTF8::String::String(const char * str) {
|
||
|
|
||
|
InitString();
|
||
|
SetString(str);
|
||
|
}
|
||
|
|
||
|
UTF8::String::String(const uint32_t * str) {
|
||
|
|
||
|
InitString();
|
||
|
ConvertFromUTF32(str);
|
||
|
}
|
||
|
|
||
|
void UTF8::String::ConvertFromUTF32(const uint32_t *s) {
|
||
|
if (s) {
|
||
|
unsigned int WideStringLength = 0;
|
||
|
do {
|
||
|
WideStringLength++;
|
||
|
if (WideStringLength == 4294967295UL) {
|
||
|
throw Exception("[ConvertFromUTF32] Cannot find termination symbol in incoming string.");
|
||
|
}
|
||
|
} while (s[WideStringLength]);
|
||
|
|
||
|
char *tmp = new char[WideStringLength * 4 + 1];
|
||
|
memset(tmp, 0, WideStringLength * 4 + 1);
|
||
|
unsigned int pos = 0;
|
||
|
|
||
|
for (int i = 0; i < WideStringLength; i++) {
|
||
|
uint32_t wc = s[i];
|
||
|
|
||
|
if (wc < 0x80) {
|
||
|
tmp[pos++] = wc;
|
||
|
} else if (wc < 0x800) {
|
||
|
tmp[pos++] = (wc >> 6) | 0b11000000;
|
||
|
tmp[pos++] = (wc & 0b111111) | 0b10000000;
|
||
|
} else if (wc < 0x10000) {
|
||
|
tmp[pos++] = (wc >> 12) | 0b11100000;
|
||
|
tmp[pos++] = ((wc >> 6) & 0b111111) | 0b10000000;
|
||
|
tmp[pos++] = (wc & 0b111111) | 0b10000000;
|
||
|
} else {
|
||
|
|
||
|
tmp[pos++] = (wc >> 18) | 0b11110000;
|
||
|
tmp[pos++] = ((wc >> 12) & 0b111111) | 0b10000000;
|
||
|
tmp[pos++] = ((wc >> 6) & 0b111111) | 0b10000000;
|
||
|
tmp[pos++] = (wc & 0b111111) | 0b10000000;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
SetString(tmp);
|
||
|
|
||
|
delete tmp;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void UTF8::String::CalculateStringLength() {
|
||
|
// We are not writing anything to memory so limits are not needed
|
||
|
if (Data) {
|
||
|
unsigned int n = 0, count = 0;
|
||
|
do {
|
||
|
// We do not need to check line end here, it is checked when string is changed
|
||
|
n += GetSequenceLength(Data + n);
|
||
|
count++;
|
||
|
} while (Data[n]);
|
||
|
|
||
|
StringLength = count;
|
||
|
} else {
|
||
|
|
||
|
StringLength = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void UTF8::String::CheckIfStringIsCorrect(const char *str) const {
|
||
|
if (str) {
|
||
|
// We are not writing anything to memory so limits are not needed
|
||
|
unsigned int n = 0, i;
|
||
|
unsigned int SequenceLength;
|
||
|
while (str[n]) {
|
||
|
SequenceLength = GetSequenceLength(str + n);
|
||
|
for (i = 1; i < SequenceLength; i++) {
|
||
|
if ((((unsigned char) str[n + i]) >> 6) != 0b10) {
|
||
|
std::string s(str);
|
||
|
throw Exception("[CheckIfStringIsCorrect] Incorrect byte in UTF8 sequence: \"" + s + "\"");
|
||
|
}
|
||
|
}
|
||
|
n += SequenceLength;
|
||
|
if (n >= 0xFFFFFFFF - 4) {
|
||
|
|
||
|
std::string s(str);
|
||
|
throw Exception("[CheckIfStringIsCorrect] termination char was not found in string: \"" + s + "\"");
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool UTF8::String::operator>(const UTF8::String &s) const {
|
||
|
if (*this == s) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (*this<s) {
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
bool UTF8::String::operator<(const UTF8::String &s) const {
|
||
|
unsigned int MinLength = StringLength < s.StringLength ? StringLength : s.StringLength;
|
||
|
|
||
|
//std::cout << "MinLength=" << MinLength;
|
||
|
|
||
|
unsigned int MyPos = 0, RemotePos = 0;
|
||
|
unsigned int MySequenceLength, RemoteSequenceLength;
|
||
|
for (unsigned int i = 0; i < MinLength; i++) {
|
||
|
MySequenceLength = GetSequenceLength(Data + MyPos);
|
||
|
RemoteSequenceLength = GetSequenceLength(s.Data + RemotePos);
|
||
|
|
||
|
if (MySequenceLength < RemoteSequenceLength) {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
if (MySequenceLength > RemoteSequenceLength) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
for (unsigned int j = 0; j < MySequenceLength; j++) {
|
||
|
if (Data[MyPos + j] < s.Data[RemotePos + j]) {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
if (Data[MyPos + j] > s.Data[RemotePos + j]) {
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
MyPos += MySequenceLength;
|
||
|
RemotePos += RemoteSequenceLength;
|
||
|
}
|
||
|
|
||
|
// If this string is substring of s (from left side) then it is lower
|
||
|
return StringLength < s.StringLength;
|
||
|
}
|
||
|
|
||
|
UTF8::String UTF8::String::operator[](unsigned int const n) const {
|
||
|
if (n >= StringLength) {
|
||
|
return UTF8::String();
|
||
|
}
|
||
|
|
||
|
if (n < 0) {
|
||
|
return UTF8::String();
|
||
|
}
|
||
|
|
||
|
unsigned int pos = 0;
|
||
|
for (unsigned int i = 0; i < n; i++) {
|
||
|
pos += GetSequenceLength(Data + pos);
|
||
|
}
|
||
|
|
||
|
char t[5];
|
||
|
memset(t, 0, 5);
|
||
|
memcpy(t, Data + pos, GetSequenceLength(Data + pos));
|
||
|
|
||
|
return UTF8::String(t);
|
||
|
}
|
||
|
|
||
|
unsigned int UTF8::String::GetSequenceLength(const char * StartByte) const {
|
||
|
if (StartByte && strlen(StartByte)) {
|
||
|
unsigned char Byte = StartByte[0];
|
||
|
if (Byte < 128) {
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
// Here we need back order due to mask operation
|
||
|
if ((Byte >> 5) == 0b110) {
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
if ((Byte >> 4) == 0b1110) {
|
||
|
return 3;
|
||
|
}
|
||
|
|
||
|
if ((Byte >> 3) == 0b11110) {
|
||
|
|
||
|
return 4;
|
||
|
}
|
||
|
|
||
|
throw Exception(std::string("[GetSequenceLength] Invalid UTF8 start byte. My own string is: [") + Data + "] Argument is: [" + StartByte + "]");
|
||
|
} else {
|
||
|
|
||
|
throw Exception(std::string("[GetSequenceLength] Invalid UTF8 start byte (it is empty). My own string is: [") + Data + "] Argument is: [" + StartByte + "]");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
UTF8::String & UTF8::String::operator=(const String &Original) {
|
||
|
// Check if objects are not same
|
||
|
if ((unsigned int long) &Original != (unsigned int long) this) {
|
||
|
Empty();
|
||
|
SetString(Original.Data);
|
||
|
}
|
||
|
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
UTF8::String & UTF8::String::operator=(const char *str) {
|
||
|
Empty();
|
||
|
SetString(str);
|
||
|
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
UTF8::String & UTF8::String::operator=(const uint32_t *str) {
|
||
|
Empty();
|
||
|
ConvertFromUTF32(str);
|
||
|
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
UTF8::String & UTF8::String::operator=(long double d) {
|
||
|
Empty();
|
||
|
ConvertFromDouble(d);
|
||
|
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
void UTF8::String::Empty() {
|
||
|
if (DataArrayLength) {
|
||
|
|
||
|
delete Data;
|
||
|
InitString();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
std::string UTF8::String::ToString() const {
|
||
|
if (DataArrayLength) {
|
||
|
return std::string(Data);
|
||
|
} else {
|
||
|
|
||
|
return std::string();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
double UTF8::String::ToDouble() const {
|
||
|
int64_t mul = 1;
|
||
|
char c;
|
||
|
int int_part = 0;
|
||
|
double prec_part = 0;
|
||
|
|
||
|
for (int i = DataArrayLength - 1; i >= 0; i--) {
|
||
|
c = Data[i];
|
||
|
if ((c >= '0') && (c <= '9')) {
|
||
|
int_part += (c - '0') * mul;
|
||
|
mul *= 10;
|
||
|
} else {
|
||
|
if (c == '.') {
|
||
|
prec_part = (double) int_part / (double) mul;
|
||
|
int_part = 0;
|
||
|
mul = 1;
|
||
|
} else {
|
||
|
if ((c == '-') && (i == 0)) {
|
||
|
int_part = -int_part;
|
||
|
prec_part = -prec_part;
|
||
|
} else {
|
||
|
|
||
|
UTF8::String err = "Cannot convert \"" + * this+"\" to double.";
|
||
|
throw UTF8::Exception(err.ToConstCharPtr(), UTF8::Exception::StringToDoubleConversionError);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return int_part + prec_part;
|
||
|
}
|
||
|
|
||
|
int64_t UTF8::String::ToLong() const {
|
||
|
int64_t mul = 1;
|
||
|
char c;
|
||
|
int64_t number = 0;
|
||
|
|
||
|
for (int i = DataArrayLength - 1; i >= 0; i--) {
|
||
|
c = Data[i];
|
||
|
if ((c >= '0') && (c <= '9')) {
|
||
|
number += (c - '0') * mul;
|
||
|
mul *= 10;
|
||
|
} else {
|
||
|
if (c == '.') {
|
||
|
number = 0;
|
||
|
mul = 1;
|
||
|
} else {
|
||
|
if ((c == '-') && (i == 0)) {
|
||
|
number = -number;
|
||
|
} else {
|
||
|
|
||
|
UTF8::String err = "Cannot convert \"" + * this+"\" to number.";
|
||
|
throw UTF8::Exception(err.ToConstCharPtr(), UTF8::Exception::StringToIntConversionError);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return number;
|
||
|
|
||
|
}
|
||
|
|
||
|
UTF8::String UTF8::String::operator+(const char *s) const {
|
||
|
UTF8::String res(*this);
|
||
|
res.AppendString(s);
|
||
|
|
||
|
return res;
|
||
|
}
|
||
|
|
||
|
bool UTF8::String::operator==(const UTF8::String &s) const {
|
||
|
if (DataArrayLength != s.DataArrayLength) {
|
||
|
return false;
|
||
|
} else {
|
||
|
for (int i = 0; i < DataArrayLength; i++) {
|
||
|
if (Data[i] != s.Data[i]) {
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool UTF8::String::operator!=(const UTF8::String &s) const {
|
||
|
|
||
|
return !(*this == s);
|
||
|
}
|
||
|
|
||
|
bool UTF8::String::operator==(const char *str) const {
|
||
|
if (str && strlen(str)) {
|
||
|
if (DataArrayLength != strlen(str)) {
|
||
|
return false;
|
||
|
} else {
|
||
|
for (int i = 0; i < DataArrayLength; i++) {
|
||
|
if (Data[i] != str[i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
} else {
|
||
|
|
||
|
return StringLength == 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool UTF8::String::operator!=(const char *str) const {
|
||
|
|
||
|
return !(*this == str);
|
||
|
}
|
||
|
|
||
|
const char * UTF8::String::ToConstCharPtr() const {
|
||
|
|
||
|
return Data;
|
||
|
}
|
||
|
|
||
|
unsigned int UTF8::String::Length() const {
|
||
|
|
||
|
return StringLength;
|
||
|
}
|
||
|
|
||
|
unsigned int UTF8::String::DataLength() const {
|
||
|
|
||
|
return DataArrayLength;
|
||
|
}
|
||
|
|
||
|
UTF8::String::~String() {
|
||
|
|
||
|
Empty();
|
||
|
}
|
||
|
|
||
|
UTF8::String::String(const String& orig) {
|
||
|
InitString();
|
||
|
SetString(orig.Data);
|
||
|
}
|
||
|
|