DOSBox-X
|
00001 00002 #ifndef DOSBOX_DOSBOX_H 00003 # error do not include directly 00004 #endif 00005 00006 #if defined(__MINGW32__) || defined(_WIN32) || defined(WINDOWS) 00007 # include <windows.h> 00008 # define ICONV_LITTLE_ENDIAN 1234 00009 # define ICONV_BIG_ENDIAN 4321 00010 # define ICONV_BYTE_ORDER ICONV_LITTLE_ENDIAN 00011 #elif defined(__APPLE__) 00012 # include <libkern/OSByteOrder.h> 00013 # define ICONV_LITTLE_ENDIAN 1234 00014 # define ICONV_BIG_ENDIAN 4321 00015 # if defined(__LITTLE_ENDIAN__) 00016 # define ICONV_BYTE_ORDER ICONV_LITTLE_ENDIAN 00017 # elif defined(__BIG_ENDIAN__) 00018 # define ICONV_BYTE_ORDER ICONV_BIG_ENDIAN 00019 # else 00020 # error Unable to determine byte order 00021 # endif 00022 #else 00023 # include <endian.h> 00024 # define ICONV_BYTE_ORDER BYTE_ORDER 00025 # define ICONV_LITTLE_ENDIAN LITTLE_ENDIAN 00026 # define ICONV_BIG_ENDIAN BIG_ENDIAN 00027 #endif 00028 00029 #include <errno.h> 00030 #include <stdio.h> 00031 #include <string.h> 00032 #include <stdlib.h> 00033 00034 #include <iostream> 00035 #include <exception> 00036 #include <stdexcept> 00037 00038 /* common code to any templated version of _IconvBase */ 00039 class _Iconv_CommonBase { 00040 public: 00041 static const char *errstring(int x); 00042 inline size_t get_src_last_read(void) const { /* in units of sizeof(srcT) */ 00043 return src_adv; 00044 } 00045 inline size_t get_dest_last_written(void) const { /* in units of sizeof(dstT) */ 00046 return dst_adv; 00047 } 00048 public: 00049 size_t dst_adv = 0; 00050 size_t src_adv = 0; 00051 public: 00052 static constexpr int err_noinit = -EBADF; 00053 static constexpr int err_noroom = -E2BIG; 00054 static constexpr int err_notvalid = -EILSEQ; 00055 static constexpr int err_incomplete = -EINVAL; 00056 protected: 00057 static constexpr bool big_endian(void) { 00058 return (ICONV_BYTE_ORDER == ICONV_BIG_ENDIAN); 00059 } 00060 }; 00061 00062 template <typename srcT,typename dstT> class _Iconv; 00063 00064 /* base _Iconv implementation, common to all implementations */ 00065 template <typename srcT,typename dstT> class _IconvBase : public _Iconv_CommonBase { 00066 public: 00067 /* NTS: The C++ standard defines std::string as std::basic_string<char>. 00068 * These typedefs will match if srcT = char and dstT = char */ 00069 typedef std::basic_string<srcT> src_string; 00070 typedef std::basic_string<dstT> dst_string; 00071 public: 00072 _IconvBase() { } 00073 virtual ~_IconvBase() { } 00074 public: 00075 void finish(void) { 00076 dst_ptr = NULL; 00077 dst_ptr_fence = NULL; 00078 src_ptr = NULL; 00079 src_ptr_fence = NULL; 00080 } 00081 00082 void set_dest(dstT * const dst,dstT * const dst_fence) { 00083 if (dst == NULL || dst_fence == NULL || dst > dst_fence) 00084 throw std::invalid_argument("Iconv set_dest pointer out of range"); 00085 00086 dst_adv = 0; 00087 dst_ptr = dst; 00088 dst_ptr_fence = dst_fence; 00089 } 00090 void set_dest(dstT * const dst,const size_t len/*in units of sizeof(dstT)*/) { 00091 set_dest(dst,dst+len); 00092 } 00093 void set_dest(dstT * const dst) = delete; /* <- NO! Prevent C-string calls to std::string &dst function! */ 00094 00095 void set_src(const srcT * const src,const srcT * const src_fence) { 00096 if (src == NULL || src_fence == NULL || src > src_fence) 00097 throw std::invalid_argument("Iconv set_src pointer out of range"); 00098 00099 src_adv = 0; 00100 src_ptr = src; 00101 src_ptr_fence = src_fence; 00102 } 00103 void set_src(const srcT * const src,const size_t len) { 00104 set_src(src,src+len); 00105 } 00106 void set_src(const srcT * const src) { // C-string 00107 set_src(src,my_strlen(src)); 00108 } 00109 public: 00110 virtual int _do_convert(void) { 00111 return err_noinit; 00112 } 00113 int string_convert(dst_string &dst,const src_string &src) { 00114 dst.resize(std::max(dst.size(),((src.length()+4u)*4u)+2u)); // maximum 4 bytes/char expansion UTF-8 or bigger if caller resized already 00115 set_dest(dst); /* will apply new size to dst/fence pointers */ 00116 00117 int err = string_convert_src(src); 00118 00119 dst.resize(get_dest_last_written()); 00120 00121 finish(); 00122 return err; 00123 } 00124 int string_convert(void) { 00125 if (dst_ptr == NULL || src_ptr == NULL) 00126 return err_notvalid; 00127 if (dst_ptr > dst_ptr_fence) 00128 return err_notvalid; 00129 if (src_ptr > src_ptr_fence) 00130 return err_notvalid; 00131 00132 int ret = _do_convert(); 00133 00134 if (ret >= 0) { 00135 /* add NUL */ 00136 if (dst_ptr >= dst_ptr_fence) 00137 return err_noroom; 00138 00139 *dst_ptr++ = 0; 00140 } 00141 00142 return ret; 00143 } 00144 int string_convert_dest(dst_string &dst) { 00145 size_t srcl = (size_t)((uintptr_t)src_ptr_fence - (uintptr_t)src_ptr); 00146 00147 dst.resize(std::max(dst.size(),((srcl+4u)*4u)+2u)); 00148 set_dest(dst); 00149 00150 int err = string_convert(); 00151 00152 finish(); 00153 return err; 00154 } 00155 int string_convert_src(const src_string &src) { 00156 set_src(src); 00157 00158 int err = string_convert(); 00159 00160 finish(); 00161 return err; 00162 } 00163 dst_string string_convert(const src_string &src) { 00164 dst_string res; 00165 00166 string_convert(res,src); 00167 00168 return res; 00169 } 00170 public: 00171 inline bool eof(void) const { 00172 return src_ptr >= src_ptr_fence; 00173 } 00174 inline bool eof_dest(void) const { 00175 return dst_ptr >= dst_ptr_fence; 00176 } 00177 inline const srcT *get_srcp(void) const { 00178 return src_ptr; 00179 } 00180 inline const dstT *get_destp(void) const { 00181 return dst_ptr; 00182 } 00183 protected: 00184 static inline size_t my_strlen(const char *s) { 00185 return strlen(s); 00186 } 00187 static inline size_t my_strlen(const wchar_t *s) { 00188 return wcslen(s); 00189 } 00190 template <typename X> static inline size_t my_strlen(const X *s) { 00191 size_t c = 0; 00192 00193 while ((*s++) != 0) c++; 00194 00195 return c; 00196 } 00197 protected: 00198 void set_dest(dst_string &dst) { /* PRIVATE: External use can easily cause use-after-free bugs */ 00199 set_dest(&dst[0],dst.size()); 00200 } 00201 void set_src(const src_string &src) { /* PRIVATE: External use can easily cause use-after-free bugs */ 00202 set_src(src.c_str(),src.length()); 00203 } 00204 protected: 00205 dstT* dst_ptr = NULL; 00206 dstT* dst_ptr_fence = NULL; 00207 const srcT* src_ptr = NULL; 00208 const srcT* src_ptr_fence = NULL; 00209 00210 friend _Iconv<srcT,dstT>; 00211 }; 00212 00213 #if defined(C_ICONV) 00214 # include <iconv.h> 00215 00216 /* _Iconv implementation of _IconvBase using GNU libiconv or GLIBC iconv, for Linux and Mac OS X systems. */ 00217 /* See also: "man iconv" 00218 * See also: [http://man7.org/linux/man-pages/man3/iconv.3.html] */ 00219 template <typename srcT,typename dstT> class _Iconv : public _IconvBase<srcT,dstT> { 00220 protected: 00221 using pclass = _IconvBase<srcT,dstT>; 00222 public: 00223 explicit _Iconv(const iconv_t &ctx) : context(ctx) {/* takes ownership of ctx */ 00224 } 00225 _Iconv(const _Iconv *p) = delete; 00226 _Iconv(const _Iconv &other) = delete; /* no copying */ 00227 _Iconv(const _Iconv &&other) = delete; /* no moving */ 00228 _Iconv() = delete; 00229 virtual ~_Iconv() { 00230 close(); 00231 } 00232 public: 00233 virtual int _do_convert(void) { 00234 if (context != NULL) { 00235 dstT *i_dst = pclass::dst_ptr; 00236 const srcT *i_src = pclass::src_ptr; 00237 size_t src_left = (size_t)((uintptr_t)((char*)pclass::src_ptr_fence) - (uintptr_t)((char*)pclass::src_ptr)); 00238 size_t dst_left = (size_t)((uintptr_t)((char*)pclass::dst_ptr_fence) - (uintptr_t)((char*)pclass::dst_ptr)); 00239 00240 iconv(context,NULL,NULL,NULL,NULL); 00241 00242 /* Ref: [http://man7.org/linux/man-pages/man3/iconv.3.html] */ 00243 int ret = iconv(context,(char**)(&(pclass::src_ptr)),&src_left,(char**)(&(pclass::dst_ptr)),&dst_left); 00244 00245 pclass::src_adv = (size_t)(pclass::src_ptr - i_src); 00246 pclass::dst_adv = (size_t)(pclass::dst_ptr - i_dst); 00247 00248 if (ret < 0) { 00249 if (errno == E2BIG) 00250 return pclass::err_noroom; 00251 else if (errno == EILSEQ) 00252 return pclass::err_notvalid; 00253 else if (errno == EINVAL) 00254 return pclass::err_incomplete; 00255 00256 return pclass::err_notvalid; 00257 } 00258 00259 return ret; 00260 } 00261 00262 return pclass::err_noinit; 00263 } 00264 public: 00265 static _Iconv<srcT,dstT> *create(const char *nw) { /* factory function, wide to char, or char to wide */ 00266 if (sizeof(dstT) == sizeof(char) && sizeof(srcT) > sizeof(char)) { 00267 const char *wchar_encoding = _get_wchar_encoding<srcT>(); 00268 if (wchar_encoding == NULL) return NULL; 00269 00270 iconv_t ctx = iconv_open(/*TO*/nw,/*FROM*/wchar_encoding); /* from wchar to codepage nw */ 00271 if (ctx != iconv_t(-1)) return new(std::nothrow) _Iconv<srcT,dstT>(ctx); 00272 } 00273 else if (sizeof(dstT) > sizeof(char) && sizeof(srcT) == sizeof(char)) { 00274 const char *wchar_encoding = _get_wchar_encoding<dstT>(); 00275 if (wchar_encoding == NULL) return NULL; 00276 00277 iconv_t ctx = iconv_open(/*TO*/wchar_encoding,/*FROM*/nw); /* from codepage new to wchar */ 00278 if (ctx != iconv_t(-1)) return new(std::nothrow) _Iconv<srcT,dstT>(ctx); 00279 } 00280 00281 return NULL; 00282 } 00283 static _Iconv<srcT,dstT> *create(const char *to,const char *from) { /* factory function */ 00284 if (sizeof(dstT) == sizeof(char) && sizeof(srcT) == sizeof(char)) { 00285 iconv_t ctx = iconv_open(to,from); 00286 if (ctx != iconv_t(-1)) return new(std::nothrow) _Iconv<srcT,dstT>(ctx); 00287 } 00288 00289 return NULL; 00290 } 00291 protected: 00292 void close(void) { 00293 if (context != NULL) { 00294 iconv_close(context); 00295 context = NULL; 00296 } 00297 } 00298 template <typename W> static const char *_get_wchar_encoding(void) { 00299 if (sizeof(W) == 4) 00300 return pclass::big_endian() ? "UTF-32BE" : "UTF-32LE"; 00301 else if (sizeof(W) == 2) 00302 return pclass::big_endian() ? "UTF-16BE" : "UTF-16LE"; 00303 00304 return NULL; 00305 } 00306 protected: 00307 iconv_t context = NULL; 00308 }; 00309 00310 /* Most of the time the Iconv form will be used, for Mac OS X and Linux platforms where UTF-8 is common. 00311 * 00312 * Conversion to/from wchar is intended for platforms like Microsoft Windows 98/ME/2000/XP/Vista/7/8/10/etc 00313 * where the Win32 API functions take WCHAR (UTF-16 or UCS-16), in which case, the code will continue to 00314 * use UTF-8 internally but convert to WCHAR when needed. For example, Win32 function CreateFileW(). 00315 * 00316 * Note that because of the UTF-16 world of Windows, Microsoft C++ defines wchar_t as an unsigned 16-bit 00317 * integer. 00318 * 00319 * Linux and other OSes however define wchar_t as a 32-bit integer, but do not use wchar_t APIs, and often 00320 * instead use UTF-8 for unicode, so the wchar_t versions will not see much use there. */ 00321 typedef _Iconv<char,char> Iconv; 00322 typedef _Iconv<char,wchar_t> IconvToW; 00323 typedef _Iconv<wchar_t,char> IconvFromW; 00324 00325 #endif // C_ICONV 00326 00327 #if defined(C_ICONV_WIN32) 00328 # include <windows.h> 00329 00330 /* Alternative implementation (char to WCHAR, or WCHAR to char only) using Microsoft Win32 APIs instead of libiconv. 00331 * For use with embedded or low memory Windows installations or environments where the added load of libiconv would 00332 * be undesirable. */ 00333 00334 /* _IconvWin32 implementation of _IconvBase using Microsoft Win32 code page and WCHAR support functions for Windows 2000/XP/Vista/7/8/10/etc */ 00335 template <typename srcT,typename dstT> class _IconvWin32 : public _IconvBase<srcT,dstT> { 00336 protected: 00337 using pclass = _IconvBase<srcT,dstT>; 00338 public: 00339 explicit _IconvWin32(const UINT _codepage) : codepage(_codepage) { 00340 } 00341 _IconvWin32(const _IconvWin32 *p) = delete; 00342 _IconvWin32(const _IconvWin32 &other) = delete; /* no copying */ 00343 _IconvWin32(const _IconvWin32 &&other) = delete; /* no moving */ 00344 _IconvWin32() = delete; 00345 virtual ~_IconvWin32() { 00346 } 00347 public: 00348 virtual int _do_convert(void) { 00349 if (codepage != 0u) { 00350 size_t src_left = (size_t)((uintptr_t)((char*)pclass::src_ptr_fence) - (uintptr_t)((char*)pclass::src_ptr)); 00351 size_t dst_left = (size_t)((uintptr_t)((char*)pclass::dst_ptr_fence) - (uintptr_t)((char*)pclass::dst_ptr)); 00352 int ret; 00353 00354 if (sizeof(dstT) == sizeof(char) && sizeof(srcT) == sizeof(WCHAR)) { 00355 /* Convert wide char to multibyte using the Win32 API. 00356 * See also: [https://docs.microsoft.com/en-us/windows/desktop/api/stringapiset/nf-stringapiset-widechartomultibyte] */ 00357 ret = WideCharToMultiByte(codepage,0,(WCHAR*)pclass::src_ptr,src_left/sizeof(srcT),(char*)pclass::dst_ptr,dst_left,NULL,NULL); 00358 pclass::src_adv = src_left; 00359 pclass::src_ptr += pclass::src_adv; 00360 pclass::dst_adv = ret; 00361 pclass::dst_ptr += pclass::dst_adv; 00362 } 00363 else if (sizeof(dstT) == sizeof(WCHAR) && sizeof(srcT) == sizeof(char)) { 00364 /* Convert multibyte to wide char using the Win32 API. 00365 * See also: [https://docs.microsoft.com/en-us/windows/desktop/api/stringapiset/nf-stringapiset-multibytetowidechar] */ 00366 ret = MultiByteToWideChar(codepage,0,(char*)pclass::src_ptr,src_left,(WCHAR*)pclass::dst_ptr,dst_left/sizeof(dstT)); 00367 pclass::src_adv = src_left; 00368 pclass::src_ptr += pclass::src_adv; 00369 pclass::dst_adv = ret; 00370 pclass::dst_ptr += pclass::dst_adv; 00371 } 00372 else { 00373 pclass::src_adv = 0; 00374 pclass::dst_adv = 0; 00375 ret = 0; 00376 } 00377 00378 if (ret == 0) { 00379 DWORD err = GetLastError(); 00380 00381 if (err == ERROR_INSUFFICIENT_BUFFER) 00382 return pclass::err_noroom; 00383 else if (err == ERROR_NO_UNICODE_TRANSLATION) 00384 return pclass::err_notvalid; 00385 00386 return pclass::err_noinit; 00387 } 00388 00389 return 0; 00390 } 00391 00392 return pclass::err_noinit; 00393 } 00394 public: 00395 static _IconvWin32<srcT,dstT> *create(const UINT codepage) { /* factory function, WCHAR to char or char to WCHAR */ 00396 CPINFO cpi; 00397 00398 /* Test whether the code page exists */ 00399 if (!GetCPInfo(codepage,&cpi)) 00400 return NULL; 00401 00402 if ((sizeof(dstT) == sizeof(char) && sizeof(srcT) == sizeof(WCHAR)) || 00403 (sizeof(dstT) == sizeof(WCHAR) && sizeof(srcT) == sizeof(char))) 00404 return new(std::nothrow) _IconvWin32<srcT,dstT>(codepage); 00405 00406 return NULL; 00407 } 00408 protected: 00409 UINT codepage = 0; 00410 }; 00411 00412 typedef _IconvWin32<char,WCHAR> IconvWin32ToW; 00413 typedef _IconvWin32<WCHAR,char> IconvWin32FromW; 00414 00415 #endif // C_ICONV_WIN32 00416