summaryrefslogtreecommitdiffstats
path: root/iconv.c
diff options
context:
space:
mode:
authorNatanael Copa <natanael.copa@gmail.com>2008-10-22 13:44:07 +0200
committerNatanael Copa <natanael.copa@gmail.com>2008-10-22 13:44:07 +0200
commit6ed652d87201c190125877cf6912cd4f2e4269b5 (patch)
tree4d430843a20030d50acf625294e1cd8916a3df0b /iconv.c
downloaduiconv-6ed652d87201c190125877cf6912cd4f2e4269b5.tar.bz2
uiconv-6ed652d87201c190125877cf6912cd4f2e4269b5.tar.xz
initial commit
Diffstat (limited to 'iconv.c')
-rw-r--r--iconv.c230
1 files changed, 230 insertions, 0 deletions
diff --git a/iconv.c b/iconv.c
new file mode 100644
index 0000000..533da0a
--- /dev/null
+++ b/iconv.c
@@ -0,0 +1,230 @@
+
+#include <errno.h>
+#include <ctype.h>
+
+#define __need_size_t
+#include <stddef.h>
+#include <string.h>
+
+#include "iconv.h"
+
+#ifndef UNUSED
+#define UNUSED __attribute__ ((__unused__))
+#endif
+
+
+static int decode_ascii(int *scalar, const unsigned char *inbuf, int count);
+static int decode_iso_8859_1(int *scalar, const unsigned char *inbuf,
+ int count);
+static int decode_utf_8(int *scalar, const unsigned char *inbuf, int count);
+
+static int encode_ascii(int scalar, unsigned char *outbuf);
+static int encode_iso_8859_1(int scalar, unsigned char *outbuf);
+static int encode_utf_8(int scalar, unsigned char *outbuf);
+
+enum {
+ ASCII = 0,
+ UTF_8,
+ ISO_8859_1
+};
+
+struct converter {
+ int code;
+ const char *name;
+ int (*decode)(int *scalar, const unsigned char *inbuf, int count);
+ int (*encode)(int scalar, unsigned char *outbuf);
+};
+
+struct converter supported_codesets[] = {
+ {
+ .code = ASCII,
+ .name = "ASCII",
+ .decode = &decode_ascii,
+ .encode = &encode_ascii,
+ }, {
+ .code = UTF_8,
+ .name = "UTF-8",
+ .decode = &decode_utf_8,
+ .encode = &encode_utf_8,
+ }, {
+ .code = ISO_8859_1,
+ .name = "ISO-8859-1",
+ .decode = &decode_iso_8859_1,
+ .encode = &encode_iso_8859_1
+ }, {
+ .code = -1,
+ }
+};
+
+/* ASCII */
+static int decode_ascii(int *scalar, const unsigned char *inbuf,
+ int UNUSED count)
+{
+ if (*inbuf >= 0x80) {
+ errno = EILSEQ;
+ return -1;
+ }
+ *scalar = *inbuf;
+ return 1;
+}
+
+static int encode_ascii(int scalar, unsigned char *outbuf)
+{
+ if (scalar >= 0x80)
+ return 0;
+ *outbuf = scalar;
+ return 1;
+}
+
+/* ISO-8859-1 */
+static int decode_iso_8859_1(int *scalar, const unsigned char *inbuf,
+ int UNUSED count)
+{
+ *scalar = *inbuf;
+ return 1;
+}
+
+static int encode_iso_8859_1(int scalar, unsigned char *outbuf)
+{
+ if (scalar > 0xff)
+ return 0;
+ *outbuf = scalar;
+ return 1;
+}
+
+
+/* UTF-8 */
+/* scalar value utf-8
+ * 00000000 00000000 0zzzzzzz 0zzzzzzz
+ * 00000000 00000yyy yyzzzzzz 110yyyyy 10zzzzzz
+ * 00000000 xxxxyyyy yyzzzzzz 1110xxxx 10yyyyyy 10zzzzzz
+ * 000wwwxx xxxxyyyy yyzzzzzz 11110www 10xxxxxx 10yyyyyy 10zzzzzz
+ *
+ * This implementation only supports the 2 first variants
+ */
+static int decode_utf_8(int *scalar, const unsigned char *inbuf, int count)
+{
+ int i = inbuf[0];
+
+ if (i < 0x80) {
+ *scalar = i;
+ return 1;
+ }
+
+ if (count < 2) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ i = (i << 8) | inbuf[1];
+ if ((i & 0xe0c0) != 0xc080) {
+ errno = EILSEQ;
+ return -1;
+ }
+
+ *scalar = ((i & 0x1700) >> 2) | (i & 0x3f);
+ return 2;
+}
+
+static int encode_utf_8(int scalar, unsigned char *outbuf)
+{
+ if (scalar < 0x80) {
+ *outbuf = scalar;
+ return 1;
+ }
+
+ if (scalar > 0x7ff)
+ return 0;
+
+ outbuf[0] = (scalar >> 6) | 0xc0;
+ outbuf[1] = (scalar & 0x3f) | 0x80;
+ return 2;
+}
+
+static void toupperstr(char *p)
+{
+ while (p && *p) {
+ if (*p == '_')
+ *p = '-';
+ *p = toupper(*p);
+ p++;
+ }
+}
+
+static int find_converter(const char *str)
+{
+ int i;
+ char buf[16];
+ strncpy(buf, str, sizeof(buf));
+ buf[15] = '\0';
+ toupperstr(buf);
+ for (i = 0; supported_codesets[i].code != -1; i++) {
+ if (strcmp(buf, supported_codesets[i].name) == 0)
+ return i;
+ }
+ return -1;
+}
+
+iconv_t iconv_open(const char *tocode, const char *fromcode)
+{
+ int src, dest;
+ iconv_t cd = (iconv_t) -1;
+
+ if (tocode == NULL || fromcode == NULL) {
+ errno = EINVAL;
+ goto return_error;
+ }
+
+ src = find_converter(fromcode);
+ dest = find_converter(tocode);
+ if (src == -1 || dest == -1) {
+ errno = EINVAL;
+ goto return_error;
+ }
+
+ cd = (iconv_t) (dest << 8) | src;
+
+return_error:
+ return cd;
+}
+
+int iconv_close(iconv_t UNUSED cd)
+{
+ return 0;
+}
+
+size_t iconv(iconv_t cd, const char **inbuf, size_t *insize,
+ char **outbuf, size_t *outsize)
+{
+ struct converter *in = &supported_codesets[cd & 0xff];
+ struct converter *out = &supported_codesets[cd >> 8];
+ size_t nonidentical = 0;
+ if (!inbuf || !*inbuf || !outbuf || !*outbuf)
+ return 0;
+ while (*insize) {
+ int scalar, infwd, outfwd;
+ infwd = in->decode(&scalar, (unsigned char *)*inbuf, *insize);
+ if (infwd < 0)
+ goto ret_error;
+
+ outfwd = out->encode(scalar, (unsigned char *)*outbuf);
+ if (outfwd > *outsize) {
+ errno = E2BIG;
+ goto ret_error;
+ }
+
+ if (**inbuf != scalar || outfwd == 0)
+ nonidentical++;
+
+ *inbuf += infwd;
+ *insize -= infwd;
+ *outbuf += outfwd;
+ *outsize -= outfwd;
+ }
+ if (*outsize)
+ *outbuf = '\0';
+ return nonidentical;
+
+ret_error:
+ return -1;
+}