DOSBox-X
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
include/iconvpp.hpp
00001 
00002 #ifndef DOSBOX_DOSBOX_H
00003 # error do not include directly
00004 #endif
00005 
00006 #if defined(__MINGW32__) || defined(_WIN32) || defined(WINDOWS)
00007 # include <windows.h>
00008 # define ICONV_LITTLE_ENDIAN 1234
00009 # define ICONV_BIG_ENDIAN 4321
00010 # define ICONV_BYTE_ORDER ICONV_LITTLE_ENDIAN
00011 #elif defined(__APPLE__)
00012 # include <libkern/OSByteOrder.h>
00013 # define ICONV_LITTLE_ENDIAN 1234
00014 # define ICONV_BIG_ENDIAN 4321
00015 # if defined(__LITTLE_ENDIAN__)
00016 #  define ICONV_BYTE_ORDER ICONV_LITTLE_ENDIAN
00017 # elif defined(__BIG_ENDIAN__)
00018 #  define ICONV_BYTE_ORDER ICONV_BIG_ENDIAN
00019 # else
00020 #  error Unable to determine byte order
00021 # endif
00022 #else
00023 # include <endian.h>
00024 # define ICONV_BYTE_ORDER BYTE_ORDER
00025 # define ICONV_LITTLE_ENDIAN LITTLE_ENDIAN
00026 # define ICONV_BIG_ENDIAN BIG_ENDIAN
00027 #endif
00028 
00029 #include <errno.h>
00030 #include <stdio.h>
00031 #include <string.h>
00032 #include <stdlib.h>
00033 
00034 #include <iostream>
00035 #include <exception>
00036 #include <stdexcept>
00037 
00038 /* common code to any templated version of _IconvBase */
00039 class _Iconv_CommonBase {
00040 public:
00041     static const char *errstring(int x);
00042     inline size_t get_src_last_read(void) const { /* in units of sizeof(srcT) */
00043         return src_adv;
00044     }
00045     inline size_t get_dest_last_written(void) const { /* in units of sizeof(dstT) */
00046         return dst_adv;
00047     }
00048 public:
00049     size_t                      dst_adv = 0;
00050     size_t                      src_adv = 0;
00051 public:
00052     static constexpr int        err_noinit = -EBADF;
00053     static constexpr int        err_noroom = -E2BIG;
00054     static constexpr int        err_notvalid = -EILSEQ;
00055     static constexpr int        err_incomplete = -EINVAL;
00056 protected:
00057     static constexpr bool big_endian(void) {
00058         return (ICONV_BYTE_ORDER == ICONV_BIG_ENDIAN);
00059     }
00060 };
00061 
00062 template <typename srcT,typename dstT> class _Iconv;
00063 
00064 /* base _Iconv implementation, common to all implementations */
00065 template <typename srcT,typename dstT> class _IconvBase : public _Iconv_CommonBase {
00066 public:
00067     /* NTS: The C++ standard defines std::string as std::basic_string<char>.
00068      *      These typedefs will match if srcT = char and dstT = char */
00069     typedef std::basic_string<srcT> src_string;
00070     typedef std::basic_string<dstT> dst_string;
00071 public:
00072     _IconvBase() { }
00073     virtual ~_IconvBase() { }
00074 public:
00075     void finish(void) {
00076         dst_ptr = NULL;
00077         dst_ptr_fence = NULL;
00078         src_ptr = NULL;
00079         src_ptr_fence = NULL;
00080     }
00081 
00082     void set_dest(dstT * const dst,dstT * const dst_fence) {
00083         if (dst == NULL || dst_fence == NULL || dst > dst_fence)
00084             throw std::invalid_argument("Iconv set_dest pointer out of range");
00085 
00086         dst_adv = 0;
00087         dst_ptr = dst;
00088         dst_ptr_fence = dst_fence;
00089     }
00090     void set_dest(dstT * const dst,const size_t len/*in units of sizeof(dstT)*/) {
00091         set_dest(dst,dst+len);
00092     }
00093     void set_dest(dstT * const dst) = delete; /* <- NO! Prevent C-string calls to std::string &dst function! */
00094 
00095     void set_src(const srcT * const src,const srcT * const src_fence) {
00096         if (src == NULL || src_fence == NULL || src > src_fence)
00097             throw std::invalid_argument("Iconv set_src pointer out of range");
00098 
00099         src_adv = 0;
00100         src_ptr = src;
00101         src_ptr_fence = src_fence;
00102     }
00103     void set_src(const srcT * const src,const size_t len) {
00104         set_src(src,src+len);
00105     }
00106     void set_src(const srcT * const src) { // C-string
00107         set_src(src,my_strlen(src));
00108     }
00109 public:
00110     virtual int _do_convert(void) {
00111         return err_noinit;
00112     }
00113     int string_convert(dst_string &dst,const src_string &src) {
00114         dst.resize(std::max(dst.size(),((src.length()+4u)*4u)+2u)); // maximum 4 bytes/char expansion UTF-8 or bigger if caller resized already
00115         set_dest(dst); /* will apply new size to dst/fence pointers */
00116 
00117         int err = string_convert_src(src);
00118 
00119         dst.resize(get_dest_last_written());
00120 
00121         finish();
00122         return err;
00123     }
00124     int string_convert(void) {
00125         if (dst_ptr == NULL || src_ptr == NULL)
00126             return err_notvalid;
00127         if (dst_ptr > dst_ptr_fence)
00128             return err_notvalid;
00129         if (src_ptr > src_ptr_fence)
00130             return err_notvalid;
00131 
00132         int ret = _do_convert();
00133 
00134         if (ret >= 0) {
00135             /* add NUL */
00136             if (dst_ptr >= dst_ptr_fence)
00137                 return err_noroom;
00138 
00139             *dst_ptr++ = 0;
00140         }
00141 
00142         return ret;
00143     }
00144     int string_convert_dest(dst_string &dst) {
00145         size_t srcl = (size_t)((uintptr_t)src_ptr_fence - (uintptr_t)src_ptr);
00146 
00147         dst.resize(std::max(dst.size(),((srcl+4u)*4u)+2u));
00148         set_dest(dst);
00149 
00150         int err = string_convert();
00151 
00152         finish();
00153         return err;
00154     }
00155     int string_convert_src(const src_string &src) {
00156         set_src(src);
00157 
00158         int err = string_convert();
00159 
00160         finish();
00161         return err;
00162     }
00163     dst_string string_convert(const src_string &src) {
00164         dst_string res;
00165 
00166         string_convert(res,src);
00167 
00168         return res;
00169     }
00170 public:
00171     inline bool eof(void) const {
00172         return src_ptr >= src_ptr_fence;
00173     }
00174     inline bool eof_dest(void) const {
00175         return dst_ptr >= dst_ptr_fence;
00176     }
00177     inline const srcT *get_srcp(void) const {
00178         return src_ptr;
00179     }
00180     inline const dstT *get_destp(void) const {
00181         return dst_ptr;
00182     }
00183 protected:
00184     static inline size_t my_strlen(const char *s) {
00185         return strlen(s);
00186     }
00187     static inline size_t my_strlen(const wchar_t *s) {
00188         return wcslen(s);
00189     }
00190     template <typename X> static inline size_t my_strlen(const X *s) {
00191         size_t c = 0;
00192 
00193         while ((*s++) != 0) c++;
00194 
00195         return c;
00196     }
00197 protected:
00198     void set_dest(dst_string &dst) { /* PRIVATE: External use can easily cause use-after-free bugs */
00199         set_dest(&dst[0],dst.size());
00200     }
00201     void set_src(const src_string &src) { /* PRIVATE: External use can easily cause use-after-free bugs */
00202         set_src(src.c_str(),src.length());
00203     }
00204 protected:
00205     dstT*                       dst_ptr = NULL;
00206     dstT*                       dst_ptr_fence = NULL;
00207     const srcT*                 src_ptr = NULL;
00208     const srcT*                 src_ptr_fence = NULL;
00209 
00210     friend _Iconv<srcT,dstT>;
00211 };
00212 
00213 #if defined(C_ICONV)
00214 # include <iconv.h>
00215 
00216 /* _Iconv implementation of _IconvBase using GNU libiconv or GLIBC iconv, for Linux and Mac OS X systems. */
00217 /* See also: "man iconv"
00218  * See also: [http://man7.org/linux/man-pages/man3/iconv.3.html] */
00219 template <typename srcT,typename dstT> class _Iconv : public _IconvBase<srcT,dstT> {
00220 protected:
00221     using pclass = _IconvBase<srcT,dstT>;
00222 public:
00223     explicit _Iconv(const iconv_t &ctx) : context(ctx) {/* takes ownership of ctx */
00224     }
00225     _Iconv(const _Iconv *p) = delete;
00226     _Iconv(const _Iconv &other) = delete; /* no copying */
00227     _Iconv(const _Iconv &&other) = delete; /* no moving */
00228     _Iconv() = delete;
00229     virtual ~_Iconv() {
00230         close();
00231     }
00232 public:
00233     virtual int _do_convert(void) {
00234         if (context != NULL) {
00235             dstT *i_dst = pclass::dst_ptr;
00236             const srcT *i_src = pclass::src_ptr;
00237             size_t src_left = (size_t)((uintptr_t)((char*)pclass::src_ptr_fence) - (uintptr_t)((char*)pclass::src_ptr));
00238             size_t dst_left = (size_t)((uintptr_t)((char*)pclass::dst_ptr_fence) - (uintptr_t)((char*)pclass::dst_ptr));
00239 
00240             iconv(context,NULL,NULL,NULL,NULL);
00241 
00242             /* Ref: [http://man7.org/linux/man-pages/man3/iconv.3.html] */
00243             int ret = iconv(context,(char**)(&(pclass::src_ptr)),&src_left,(char**)(&(pclass::dst_ptr)),&dst_left);
00244 
00245             pclass::src_adv = (size_t)(pclass::src_ptr - i_src);
00246             pclass::dst_adv = (size_t)(pclass::dst_ptr - i_dst);
00247 
00248             if (ret < 0) {
00249                 if (errno == E2BIG)
00250                     return pclass::err_noroom;
00251                 else if (errno == EILSEQ)
00252                     return pclass::err_notvalid;
00253                 else if (errno == EINVAL)
00254                     return pclass::err_incomplete;
00255 
00256                 return pclass::err_notvalid;
00257             }
00258 
00259             return ret;
00260         }
00261 
00262         return pclass::err_noinit;
00263     }
00264 public:
00265     static _Iconv<srcT,dstT> *create(const char *nw) { /* factory function, wide to char, or char to wide */
00266         if (sizeof(dstT) == sizeof(char) && sizeof(srcT) > sizeof(char)) {
00267             const char *wchar_encoding = _get_wchar_encoding<srcT>();
00268             if (wchar_encoding == NULL) return NULL;
00269 
00270             iconv_t ctx = iconv_open(/*TO*/nw,/*FROM*/wchar_encoding); /* from wchar to codepage nw */
00271             if (ctx != iconv_t(-1)) return new(std::nothrow) _Iconv<srcT,dstT>(ctx);
00272         }
00273         else if (sizeof(dstT) > sizeof(char) && sizeof(srcT) == sizeof(char)) {
00274             const char *wchar_encoding = _get_wchar_encoding<dstT>();
00275             if (wchar_encoding == NULL) return NULL;
00276 
00277             iconv_t ctx = iconv_open(/*TO*/wchar_encoding,/*FROM*/nw); /* from codepage new to wchar */
00278             if (ctx != iconv_t(-1)) return new(std::nothrow) _Iconv<srcT,dstT>(ctx);
00279         }
00280 
00281         return NULL;
00282     }
00283     static _Iconv<srcT,dstT> *create(const char *to,const char *from) { /* factory function */
00284         if (sizeof(dstT) == sizeof(char) && sizeof(srcT) == sizeof(char)) {
00285             iconv_t ctx = iconv_open(to,from);
00286             if (ctx != iconv_t(-1)) return new(std::nothrow) _Iconv<srcT,dstT>(ctx);
00287         }
00288 
00289         return NULL;
00290     }
00291 protected:
00292     void close(void) {
00293         if (context != NULL) {
00294             iconv_close(context);
00295             context = NULL;
00296         }
00297     }
00298     template <typename W> static const char *_get_wchar_encoding(void) {
00299         if (sizeof(W) == 4)
00300             return pclass::big_endian() ? "UTF-32BE" : "UTF-32LE";
00301         else if (sizeof(W) == 2)
00302             return pclass::big_endian() ? "UTF-16BE" : "UTF-16LE";
00303 
00304         return NULL;
00305     }
00306 protected:
00307     iconv_t                     context = NULL;
00308 };
00309 
00310 /* Most of the time the Iconv form will be used, for Mac OS X and Linux platforms where UTF-8 is common.
00311  *
00312  * Conversion to/from wchar is intended for platforms like Microsoft Windows 98/ME/2000/XP/Vista/7/8/10/etc
00313  * where the Win32 API functions take WCHAR (UTF-16 or UCS-16), in which case, the code will continue to
00314  * use UTF-8 internally but convert to WCHAR when needed. For example, Win32 function CreateFileW().
00315  *
00316  * Note that because of the UTF-16 world of Windows, Microsoft C++ defines wchar_t as an unsigned 16-bit
00317  * integer.
00318  *
00319  * Linux and other OSes however define wchar_t as a 32-bit integer, but do not use wchar_t APIs, and often
00320  * instead use UTF-8 for unicode, so the wchar_t versions will not see much use there. */
00321 typedef _Iconv<char,char>    Iconv;
00322 typedef _Iconv<char,wchar_t> IconvToW;
00323 typedef _Iconv<wchar_t,char> IconvFromW;
00324 
00325 #endif // C_ICONV
00326 
00327 #if defined(C_ICONV_WIN32)
00328 # include <windows.h>
00329 
00330 /* Alternative implementation (char to WCHAR, or WCHAR to char only) using Microsoft Win32 APIs instead of libiconv.
00331  * For use with embedded or low memory Windows installations or environments where the added load of libiconv would
00332  * be undesirable. */
00333 
00334 /* _IconvWin32 implementation of _IconvBase using Microsoft Win32 code page and WCHAR support functions for Windows 2000/XP/Vista/7/8/10/etc */
00335 template <typename srcT,typename dstT> class _IconvWin32 : public _IconvBase<srcT,dstT> {
00336 protected:
00337     using pclass = _IconvBase<srcT,dstT>;
00338 public:
00339     explicit _IconvWin32(const UINT _codepage) : codepage(_codepage) {
00340     }
00341     _IconvWin32(const _IconvWin32 *p) = delete;
00342     _IconvWin32(const _IconvWin32 &other) = delete; /* no copying */
00343     _IconvWin32(const _IconvWin32 &&other) = delete; /* no moving */
00344     _IconvWin32() = delete;
00345     virtual ~_IconvWin32() {
00346     }
00347 public:
00348     virtual int _do_convert(void) {
00349         if (codepage != 0u) {
00350             size_t src_left = (size_t)((uintptr_t)((char*)pclass::src_ptr_fence) - (uintptr_t)((char*)pclass::src_ptr));
00351             size_t dst_left = (size_t)((uintptr_t)((char*)pclass::dst_ptr_fence) - (uintptr_t)((char*)pclass::dst_ptr));
00352             int ret;
00353 
00354             if (sizeof(dstT) == sizeof(char) && sizeof(srcT) == sizeof(WCHAR)) {
00355                 /* Convert wide char to multibyte using the Win32 API.
00356                  * See also: [https://docs.microsoft.com/en-us/windows/desktop/api/stringapiset/nf-stringapiset-widechartomultibyte] */
00357                 ret = WideCharToMultiByte(codepage,0,(WCHAR*)pclass::src_ptr,src_left/sizeof(srcT),(char*)pclass::dst_ptr,dst_left,NULL,NULL);
00358                 pclass::src_adv = src_left;
00359                 pclass::src_ptr += pclass::src_adv;
00360                 pclass::dst_adv = ret;
00361                 pclass::dst_ptr += pclass::dst_adv;
00362             }
00363             else if (sizeof(dstT) == sizeof(WCHAR) && sizeof(srcT) == sizeof(char)) {
00364                 /* Convert multibyte to wide char using the Win32 API.
00365                  * See also: [https://docs.microsoft.com/en-us/windows/desktop/api/stringapiset/nf-stringapiset-multibytetowidechar] */
00366                 ret = MultiByteToWideChar(codepage,0,(char*)pclass::src_ptr,src_left,(WCHAR*)pclass::dst_ptr,dst_left/sizeof(dstT));
00367                 pclass::src_adv = src_left;
00368                 pclass::src_ptr += pclass::src_adv;
00369                 pclass::dst_adv = ret;
00370                 pclass::dst_ptr += pclass::dst_adv;
00371             }
00372             else {
00373                 pclass::src_adv = 0;
00374                 pclass::dst_adv = 0;
00375                 ret = 0;
00376             }
00377 
00378             if (ret == 0) {
00379                 DWORD err = GetLastError();
00380 
00381                 if (err == ERROR_INSUFFICIENT_BUFFER)
00382                     return pclass::err_noroom;
00383                 else if (err == ERROR_NO_UNICODE_TRANSLATION)
00384                     return pclass::err_notvalid;
00385 
00386                 return pclass::err_noinit;
00387             }
00388 
00389             return 0;
00390         }
00391 
00392         return pclass::err_noinit;
00393     }
00394 public:
00395     static _IconvWin32<srcT,dstT> *create(const UINT codepage) { /* factory function, WCHAR to char or char to WCHAR */
00396         CPINFO cpi;
00397 
00398         /* Test whether the code page exists */
00399         if (!GetCPInfo(codepage,&cpi))
00400                 return NULL;
00401 
00402         if ((sizeof(dstT) == sizeof(char) && sizeof(srcT) == sizeof(WCHAR)) ||
00403             (sizeof(dstT) == sizeof(WCHAR) && sizeof(srcT) == sizeof(char)))
00404             return new(std::nothrow) _IconvWin32<srcT,dstT>(codepage);
00405 
00406         return NULL;
00407     }
00408 protected:
00409     UINT                        codepage = 0;
00410 };
00411 
00412 typedef _IconvWin32<char,WCHAR> IconvWin32ToW;
00413 typedef _IconvWin32<WCHAR,char> IconvWin32FromW;
00414 
00415 #endif // C_ICONV_WIN32
00416