#include #include /* * Use the FSS-UTF transformation proposed by posix. * We define 7 byte types: * T0 0xxxxxxx 7 free bits * Tx 10xxxxxx 6 free bits * T1 110xxxxx 5 free bits * T2 1110xxxx 4 free bits * T3 11110xxx 3 free bits * T4 111110xx 2 free bits * T5 1111110x 1 free bit * * Encoding is as follows. * From hex Thru hex Sequence Bits * 00000000 0000007F T0 7 * 00000080 000007FF T1 Tx 11 * 00000800 0000FFFF T2 Tx Tx 16 * 00010000 001FFFFF T3 Tx Tx Tx 21 * 00200000 03FFFFFF T4 Tx Tx Tx Tx 26 * 04000000 7FFFFFFF T5 Tx Tx Tx Tx Tx 31 */ int mbtowc(wchar_t *pwc, const char *s, size_t n); int mblen(const char *s, size_t n) { return mbtowc(0, s, n); } enum { C0MSK = 0x7F, C1MSK = 0x7FF, T1 = 0xC0, T2 = 0xE0, NT1BITS = 11, NSHFT = 5, NCSHFT = NSHFT + 1, WCHARMSK = (1<< (8*MB_LEN_MAX - 1)) - 1, }; int mbtowc(wchar_t *pwc, const char *s, size_t n) { unsigned long long c[MB_LEN_MAX]; unsigned long long l, m, wm, b; int i; if(!s) return 0; if(n < 1) goto bad; c[0] = s[0] & 0xff; /* first one is special */ if((c[0] & 0x80) == 0x00) { if(pwc) *pwc = c[0]; if(c[0] == 0) return 0; return 1; } m = T2; b = m^0x20; l = c[0]; wm = C1MSK; for(i = 1; i < MB_LEN_MAX + 1; i++){ if(n < i+1) goto bad; c[i] = (s[i] ^ 0x80) & 0xff; l = (l << NCSHFT) | c[i]; if((c[i] & 0xC0) != 0x00) goto bad; if((c[0] & m) == b) { if(pwc) *pwc = l & wm; return i + 1; } b = m; m = (m >> 1) | 0x80; wm = (wm << NSHFT) | wm; } /* * bad decoding */ bad: return -1; } int wctomb(char *s, wchar_t wchar) { unsigned long long c, maxc, m; int i, j; if(!s) return 0; maxc = 0x80; c = wchar & WCHARMSK; if(c < maxc) { s[0] = c; return 1; } m = T1; for(i = 2; i < MB_LEN_MAX + 1; i++){ maxc <<= 4; if(c < maxc || i == MB_LEN_MAX){ s[0] = m | (c >> ((i - 1) * NCSHFT)); for(j = i - 1; j >= 1; j--){ s[i - j] = 0x80|((c>>(6 * (j - 1)))&0x3f); } return i; } m = (m >> 1) | 0x80; } return MB_LEN_MAX; } size_t mbstowcs(wchar_t *pwcs, const char *s, size_t n) { int i, d, c; for(i=0; i < n; i++) { c = *s & 0xff; if(c < 0x80) { *pwcs = c; if(c == 0) break; s++; } else { d = mbtowc(pwcs, s, MB_LEN_MAX); if(d <= 0) return (size_t)((d<0) ? -1 : i); s += d; } pwcs++; } return i; } size_t wcstombs(char *s, const wchar_t *pwcs, size_t n) { int i, d; long c; char *p, *pe; char buf[MB_LEN_MAX]; p = s; pe = p+n-MB_LEN_MAX; while(p < pe) { c = *pwcs++; if(c < 0x80) *p++ = c; else p += wctomb(p, c); if(c == 0) return p-s; } while(p < pe+MB_LEN_MAX) { c = *pwcs++; d = wctomb(buf, c); if(p+d <= pe+MB_LEN_MAX) { *p++ = buf[0]; /* first one is special */ for(i = 2; i < MB_LEN_MAX + 1; i++){ if(d <= i -1) break; *p++ = buf[i]; } } if(c == 0) break; } return p-s; }