This section of the archives stores flipcode's complete Developer Toolbox collection, featuring a variety of mini-articles and source code contributions from our readers.

 

  Unicode String Class
  Submitted by



Having recently discovered the immense benefits of using unicode, I decided to make a string class based on the unicode standard. Why not use std::string you say, well i did but I needed a bit more functionality. Easy conversion to ascii to be more specific.

There's also a few other functions, like basic search and substring functions and also string->double, string->long and double->string.

This code is windows specific, but it's only a few functions like WideCharToMultiByte, MultiByteToWideChar, _setmbcp and _getmbcp. If anyone knows how to do these conversion in a platform idependent way, I would love to know.

The code may lack some comments, but frankly, I don't think it needs a lot of comments.

This code is free for anyone to use as they please, and you are free to email me about anything.

Cheers
John Juul Jensen

Currently browsing [uastring.zip] (3,538 bytes) - [UAString.cpp] - (11,413 bytes)

#include "wchar.h"
#include "windows.h"
#include <mbctype.h>

#include "UAString.h"

//#include "PointsAndVectors.h" //#include "Color.h"

/************************************************************************/ /* Constructors / Destructor */ /************************************************************************/

#define CONSTRUCTOR_ALL \ uStringLength = 0; \ uStringBufferSize = 0; \ uString = NULL; \ aStringLength = 0; \ aString = NULL; \ codepage = CP_ACP; \

CUAString::CUAString() { CONSTRUCTOR_ALL; }

CUAString::CUAString( const CUAString& _in) { CONSTRUCTOR_ALL; *this = _in; }

CUAString::CUAString( const wchar_t* _in) { CONSTRUCTOR_ALL; *this = _in; }

CUAString::CUAString( const char* _in) { CONSTRUCTOR_ALL; *this = _in; }

CUAString::CUAString( const double _in) { CONSTRUCTOR_ALL; *this = _in; }

CUAString::~CUAString() { Clear(); }

/************************************************************************/ /* */ /* Operators */ /* */ /************************************************************************/

/************************************************************************/ /* = Operators */ /************************************************************************/ const CUAString& CUAString::operator=( const CUAString& _in) { if ( _in.uStringLength) { AllocateString( _in.uStringLength); AppendString( _in.uString, _in.uStringLength); }

codepage = _in.Codepage; return *this; }

const CUAString& CUAString::operator=( const wchar_t* _in) { if ( _in) { int inLength = wcslen( _in); if ( inLength) { AllocateString( inLength + 1); AppendString( _in, inLength); } else { DeAllocateString(); } }

return *this; }

const CUAString& CUAString::operator=( const char* _in) { if ( _in) { int inLength = strlen( _in); int cp = codepage; if ( !codepage) { cp = CP_ACP; } int convertedInLength = MultiByteToWideChar( cp, 0, _in, inLength, NULL, 0); if ( convertedInLength) { AllocateString( convertedInLength + 1); uStringLength = MultiByteToWideChar( cp, 0, _in, inLength, uString, uStringBufferSize); uString[ uStringLength] = 0; } else { DeAllocateString(); } } return *this; }

const CUAString& CUAString::operator=( const double _in) { DeAllocateString(); AppendDouble( _in); return *this; }

/************************************************************************/ /* += Operators */ /************************************************************************/

const CUAString& CUAString::operator+=( const CUAString& _in) { AppendString( _in.uString, _in.uStringLength); return *this; }

const CUAString& CUAString::operator+=( const wchar_t* _in) { int inLength = wcslen( _in); AppendString( _in, inLength); return *this; }

const CUAString& CUAString::operator+=( const char* _in) { CUAString tempString( _in); *this += tempString; return *this; }

const CUAString& CUAString::operator+=( const double _in) { AppendDouble( _in); return *this; }

/************************************************************************/ /* + Operators */ /************************************************************************/ #define OPERATOR_PLUS( type1, type2) \ CUAString operator+( const type1 string1, const type2 string2) \ { \ CUAString ret = string1; \ ret += string2; \ return ret; \ } \

OPERATOR_PLUS( CUAString&, CUAString&); OPERATOR_PLUS( CUAString&, wchar_t*); OPERATOR_PLUS( wchar_t*, CUAString&); OPERATOR_PLUS( CUAString&, char*); OPERATOR_PLUS( char*, CUAString&); OPERATOR_PLUS( CUAString&, double); OPERATOR_PLUS( double, CUAString&);

/************************************************************************/ /* Comaprison operators */ /************************************************************************/

bool CUAString::operator==( const CUAString& _in) { return !wcscmp( uString, _in.uString); }

bool CUAString::operator==( const wchar_t* _in) { return !wcscmp( uString, _in); }

bool CUAString::operator!=( const CUAString& _in) { return wcscmp( uString, _in.uString); }

bool CUAString::operator!=( const wchar_t* _in) { return wcscmp( uString, _in); }

/************************************************************************/ /* Search functions */ /* */ /* If the find functions returns something>Length then no occurences */ /* were found. */ /************************************************************************/

unsigned int CUAString::FindFirstOf( unsigned int startPos, wchar_t* filter) { if ( startPos < uStringLength) { wchar_t* res = wcspbrk( uString + startPos, filter); if ( res){ return res - uString; } } return uStringLength; }

unsigned int CUAString::FindFirstNotOf( unsigned int startPos, wchar_t* filter) { size_t res = uStringLength; if ( startPos < uStringLength) { res = wcsspn( uString + startPos, filter) + startPos; } return res; }

CUAString CUAString::SubString( unsigned int startPos, int length) { CUAString ret;

if ( startPos < uStringLength) { int minLength = min( length, uStringLength - startPos); if ( length < 0) { minLength = uStringLength - startPos; } ret.AppendString( uString + startPos, minLength); }

return ret; }

/************************************************************************/ /* Public Functions */ /************************************************************************/

bool CUAString::Empty() { if ( uStringLength) { return true; } else { return false; } }

void CUAString::MakeLower() { wcslwr( uString); }

void CUAString::MakeUpper() { wcsupr( uString); }

int CUAString::Codepage() { return codepage; }

bool CUAString::Codepage( int _codepage) { codepage = CP_ACP;

int oldCodepage = _getmbcp();

if ( 0 == _setmbcp( _codepage)) { codepage = _codepage; }

_setmbcp( oldCodepage);

if ( codepage) { return true; } else { return false; } }

/************************************************************************/ /* Conversion functions (AsFoo) */ /************************************************************************/

double CUAString::AsDouble( bool extract) { unsigned int endPos, startPos; double number = 0; endPos = 0; startPos = FindFirstOf( endPos, L"+-1234567890."); endPos = FindFirstNotOf( startPos + 1, L"+-1234567890.");

if ( startPos < uStringLength) { number = wcstod( SubString( startPos, endPos - startPos).AsUnicode(), NULL);

if ( extract) { *this = SubString( endPos); } } return number; }

long CUAString::AsLong( bool extract) { unsigned int endPos, startPos; long number = 0; endPos = 0; startPos = FindFirstOf( endPos, L"+-1234567890"); endPos = FindFirstNotOf( startPos + 1, L"+-1234567890"); if ( startPos < uStringLength) { number = wcstol( SubString( startPos, endPos - startPos).AsUnicode(), NULL, 10); if ( extract) { *this = SubString( endPos); } } return number; }

CUAString CUAString::AsToken( wchar_t startChar, wchar_t endChar, bool extract) { CUAString ret; wchar_t startBuf[] = L"["; startBuf[0] = startChar; wchar_t endBuf[] = L"]"; endBuf[0] = endChar; unsigned int endPos, startPos; endPos = 0; startPos = FindFirstOf( endPos, startBuf) + 1; endPos = FindFirstOf( startPos, endBuf); ret = SubString( startPos, endPos - startPos); if ( extract) { *this = SubString( endPos + 1); }

return ret; }

const wchar_t* CUAString::AsUnicode() { return uString; }

const char* CUAString::AsAscii( int _codepage) {

int cp = codepage;

if ( _codepage) { cp = _codepage; }

if ( !aString) {

int convertedLength = WideCharToMultiByte( cp, 0, uString, uStringLength, aString, 0, NULL, NULL); if ( convertedLength) { aStringLength = convertedLength; if ( aString) delete[] aString; aString = new char[aStringLength + 1]; WideCharToMultiByte( cp, 0, uString, uStringLength, aString, aStringLength, NULL, NULL); aString[ aStringLength] = 0; } }

if ( aString) { return aString; }

return ""; }

/************************************************************************/ /* Protected Functions */ /************************************************************************/

void CUAString::Clear() { DeAllocateString(); codepage = CP_ACP; }

void CUAString::DeAllocateString() { uStringLength = 0; uStringBufferSize = 0; if ( uString) delete[] uString; uString = NULL; aStringLength = 0; if ( aString) delete[] aString; aString = NULL; }

void CUAString::AllocateString( unsigned int _size) { DeAllocateString(); if ( _size > 0) { uStringBufferSize = _size; uString = new wchar_t[ uStringBufferSize]; uString[0] = 0; } }

void CUAString::ReAllocateString( unsigned int _size) {

if ( _size > 0) { wchar_t* tempStr = new wchar_t[ _size]; int minLength = min( uStringLength + 1, _size); if ( uString) { memcpy( tempStr, uString, minLength * sizeof(wchar_t)); } tempStr[minLength - 1] = 0;

DeAllocateString();

uString = tempStr; uStringLength = minLength - 1; uStringBufferSize = _size; } else { DeAllocateString(); } }

void CUAString::AppendString( const wchar_t* _str, unsigned int _length) { if ( uStringBufferSize < uStringLength + _length + 1) { ReAllocateString( uStringLength + _length + 1); }

memcpy( uString + uStringLength, _str, _length * sizeof(wchar_t)); uStringLength = uStringLength + _length; uString[ uStringLength] = 0; }

void CUAString::AppendDouble( double num) { int dec, sign; CUAString tempString; char* buffer = _fcvt( num, 5, &dec, &sign );

int bufLength = strlen( buffer) - 1; while ( (bufLength > 0) && (buffer[bufLength] == '0')) { buffer[bufLength] = 0; bufLength--; }

if ( buffer[0] != '0') { if ( sign) tempString += L"-"; if ( dec <= 0) { tempString += L"0."; while ( dec < 0) { tempString += L"0"; dec++; } } else if ( bufLength > dec) { char temp = buffer[dec]; buffer[dec] = 0; tempString += buffer; buffer[dec] = temp; buffer += dec; tempString += L"."; } }

tempString += buffer;

*this += tempString; }

Currently browsing [uastring.zip] (3,538 bytes) - [UAString.h] - (3,817 bytes)

/********************************************************************
	created:		2001/03/24
	file name:		D:\grafik\MPREstd\UAString.h
	OS specific:	Yes
	author:			John Juul Jensen

Copyright 2001 [COPYRIGHT NOTICE] contains: CUAString *********************************************************************/


#ifndef __UASTRING_H #define __UASTRING_H

#include "wchar.h"

/******************************************************************** class name: CUAString created: 2001/03/24 author: John Juul Jensen purpose: The CUAString class is a string class based on the unicode standard. All text is kept in unicode internally and this is also the preferred external use. For accurate conversion between ascii and unicode, a codepage can be specified. The class implements the conversion function string->float. A few basic search functions, similar to the ones found in std::string, has also been implemented. All string lengths in this this class are assumed to be excluding the null character. *********************************************************************/ class CUAString { public: // Constructors / Destructor CUAString(); CUAString( const CUAString& _in); CUAString( const wchar_t* _in); CUAString( const char* _in); CUAString( const double _in); ~CUAString();

public: // Operators const CUAString& operator=( const CUAString& _in); const CUAString& operator=( const wchar_t* _in); const CUAString& operator=( const char* _in); const CUAString& operator=( const double _in);

const CUAString& operator+=( const CUAString& _in); const CUAString& operator+=( const wchar_t* _in); const CUAString& operator+=( const char* _in); const CUAString& operator+=( const double _in);

friend CUAString operator+( const CUAString& string1, const CUAString& string2); friend CUAString operator+( const CUAString& string1, const wchar_t* string2); friend CUAString operator+( const wchar_t* string1, const CUAString& string2); friend CUAString operator+( const CUAString& string1, const char* string2); friend CUAString operator+( const char* string1, const CUAString& string2); friend CUAString operator+( const CUAString& string1, const double string2); friend CUAString operator+( const double string1, const CUAString& string2);

bool operator==( const CUAString& _in); bool operator==( const wchar_t* _in); bool operator!=( const CUAString& _in); bool operator!=( const wchar_t* _in);

public: // Search functions unsigned int FindFirstOf( unsigned int startPos, wchar_t* filter); unsigned int FindFirstNotOf( unsigned int startPos, wchar_t* filter); CUAString SubString( unsigned int startPos, int length = -1);

public: // Extract/Conversion routines double AsDouble( bool extract = false); long AsLong( bool extract = false); CUAString AsToken( wchar_t startChar = L'[', wchar_t endChar = L']', bool extract = false); const wchar_t* AsUnicode(); const char* AsAscii( int _codepage = 0);

public: // Misc functions int Codepage(); bool Codepage( int _codepage);

void MakeLower(); void MakeUpper();

bool Empty();

protected: // functions void BuildAscii(); void Clear();

void DeAllocateString(); void AllocateString( unsigned int _size); void ReAllocateString( unsigned int _size); void AppendString( const wchar_t* _str, unsigned int _length); void AppendDouble( double num);

protected: // variables unsigned int codepage;

unsigned int uStringLength; // length excluding terminating null character unsigned int uStringBufferSize; // size of buffer wchar_t* uString;

unsigned int aStringLength; char* aString; // length excluding terminating null character };

#endif

The zip file viewer built into the Developer Toolbox made use of the zlib library, as well as the zlibdll source additions.

 

Copyright 1999-2008 (C) FLIPCODE.COM and/or the original content author(s). All rights reserved.
Please read our Terms, Conditions, and Privacy information.