summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile61
-rw-r--r--README4
-rw-r--r--TODO2
-rw-r--r--iconv.c230
-rw-r--r--iconv.h16
-rw-r--r--test.c72
6 files changed, 385 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f22d82d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,61 @@
+NAME= uiconv
+VERSION= 0.2.3
+
+LIB= lib
+INCLUDE= include
+PREFIX= /usr/local
+DESTDIR=
+
+CFLAGS= -Wall -Werror -Os
+CFLAGS+= -std=gnu99 -fPIC
+
+TARGETS= libiconv.a
+
+DISTFILES= Makefile README TODO iconv.c iconv.h test.c
+
+all: static
+
+static: libiconv.a
+
+shared: libiconv.so
+
+libiconv.a: iconv.o
+ $(AR) -cr $@ $<
+
+libiconv.so.0: iconv.o
+ $(CC) $(LDFLAGS) -o $@ -shared $<
+
+libiconv.so: libiconv.so.0
+ ln -sf $< $@
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c -I. $<
+
+clean:
+ rm -f *.a *.so *.so.0 *.o
+
+test-gnu: test.c libiconv.a
+ $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $<
+
+test-uiconv: test.c libiconv.a
+ $(CC) -I. -L. -o $@ $(CFLAGS) $(LDFLAGS) $<
+
+test: test-gnu test-uiconv
+ ./test-uiconv ABC `echo -e "a\xc3\xb8"` `echo -e "a\xf8"`
+
+P=$(NAME)-$(VERSION)
+dist: $(P).tar.gz
+
+$(P).tar.gz: $(DISTFILES)
+ rm -rf $(P)
+ mkdir $(P)
+ cp $(DISTFILES) $(P)/
+ tar -zcf $@ $(P)
+ rm -rf $(P)
+
+install:
+ mkdir -p $(DESTDIR)$(PREFIX)/$(LIB)
+ mkdir -p $(DESTDIR)$(PREFIX)/$(INCLUDE)
+ cp libiconv.a $(DESTDIR)$(PREFIX)/$(LIB)
+ cp iconv.h $(DESTDIR)$(PREFIX)/$(INCLUDE)
+
diff --git a/README b/README
new file mode 100644
index 0000000..d7681d0
--- /dev/null
+++ b/README
@@ -0,0 +1,4 @@
+Minor iconv implementation
+
+Handles ASCII, ISO-8859-1 and UTF-8 (2 byte lenght chars)
+
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..4ba3307
--- /dev/null
+++ b/TODO
@@ -0,0 +1,2 @@
+Support multiple names on same codesets. i.e UTF8 = UTF-8
+
diff --git a/iconv.c b/iconv.c
new file mode 100644
index 0000000..533da0a
--- /dev/null
+++ b/iconv.c
@@ -0,0 +1,230 @@
+
+#include <errno.h>
+#include <ctype.h>
+
+#define __need_size_t
+#include <stddef.h>
+#include <string.h>
+
+#include "iconv.h"
+
+#ifndef UNUSED
+#define UNUSED __attribute__ ((__unused__))
+#endif
+
+
+static int decode_ascii(int *scalar, const unsigned char *inbuf, int count);
+static int decode_iso_8859_1(int *scalar, const unsigned char *inbuf,
+ int count);
+static int decode_utf_8(int *scalar, const unsigned char *inbuf, int count);
+
+static int encode_ascii(int scalar, unsigned char *outbuf);
+static int encode_iso_8859_1(int scalar, unsigned char *outbuf);
+static int encode_utf_8(int scalar, unsigned char *outbuf);
+
+enum {
+ ASCII = 0,
+ UTF_8,
+ ISO_8859_1
+};
+
+struct converter {
+ int code;
+ const char *name;
+ int (*decode)(int *scalar, const unsigned char *inbuf, int count);
+ int (*encode)(int scalar, unsigned char *outbuf);
+};
+
+struct converter supported_codesets[] = {
+ {
+ .code = ASCII,
+ .name = "ASCII",
+ .decode = &decode_ascii,
+ .encode = &encode_ascii,
+ }, {
+ .code = UTF_8,
+ .name = "UTF-8",
+ .decode = &decode_utf_8,
+ .encode = &encode_utf_8,
+ }, {
+ .code = ISO_8859_1,
+ .name = "ISO-8859-1",
+ .decode = &decode_iso_8859_1,
+ .encode = &encode_iso_8859_1
+ }, {
+ .code = -1,
+ }
+};
+
+/* ASCII */
+static int decode_ascii(int *scalar, const unsigned char *inbuf,
+ int UNUSED count)
+{
+ if (*inbuf >= 0x80) {
+ errno = EILSEQ;
+ return -1;
+ }
+ *scalar = *inbuf;
+ return 1;
+}
+
+static int encode_ascii(int scalar, unsigned char *outbuf)
+{
+ if (scalar >= 0x80)
+ return 0;
+ *outbuf = scalar;
+ return 1;
+}
+
+/* ISO-8859-1 */
+static int decode_iso_8859_1(int *scalar, const unsigned char *inbuf,
+ int UNUSED count)
+{
+ *scalar = *inbuf;
+ return 1;
+}
+
+static int encode_iso_8859_1(int scalar, unsigned char *outbuf)
+{
+ if (scalar > 0xff)
+ return 0;
+ *outbuf = scalar;
+ return 1;
+}
+
+
+/* UTF-8 */
+/* scalar value utf-8
+ * 00000000 00000000 0zzzzzzz 0zzzzzzz
+ * 00000000 00000yyy yyzzzzzz 110yyyyy 10zzzzzz
+ * 00000000 xxxxyyyy yyzzzzzz 1110xxxx 10yyyyyy 10zzzzzz
+ * 000wwwxx xxxxyyyy yyzzzzzz 11110www 10xxxxxx 10yyyyyy 10zzzzzz
+ *
+ * This implementation only supports the 2 first variants
+ */
+static int decode_utf_8(int *scalar, const unsigned char *inbuf, int count)
+{
+ int i = inbuf[0];
+
+ if (i < 0x80) {
+ *scalar = i;
+ return 1;
+ }
+
+ if (count < 2) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ i = (i << 8) | inbuf[1];
+ if ((i & 0xe0c0) != 0xc080) {
+ errno = EILSEQ;
+ return -1;
+ }
+
+ *scalar = ((i & 0x1700) >> 2) | (i & 0x3f);
+ return 2;
+}
+
+static int encode_utf_8(int scalar, unsigned char *outbuf)
+{
+ if (scalar < 0x80) {
+ *outbuf = scalar;
+ return 1;
+ }
+
+ if (scalar > 0x7ff)
+ return 0;
+
+ outbuf[0] = (scalar >> 6) | 0xc0;
+ outbuf[1] = (scalar & 0x3f) | 0x80;
+ return 2;
+}
+
+static void toupperstr(char *p)
+{
+ while (p && *p) {
+ if (*p == '_')
+ *p = '-';
+ *p = toupper(*p);
+ p++;
+ }
+}
+
+static int find_converter(const char *str)
+{
+ int i;
+ char buf[16];
+ strncpy(buf, str, sizeof(buf));
+ buf[15] = '\0';
+ toupperstr(buf);
+ for (i = 0; supported_codesets[i].code != -1; i++) {
+ if (strcmp(buf, supported_codesets[i].name) == 0)
+ return i;
+ }
+ return -1;
+}
+
+iconv_t iconv_open(const char *tocode, const char *fromcode)
+{
+ int src, dest;
+ iconv_t cd = (iconv_t) -1;
+
+ if (tocode == NULL || fromcode == NULL) {
+ errno = EINVAL;
+ goto return_error;
+ }
+
+ src = find_converter(fromcode);
+ dest = find_converter(tocode);
+ if (src == -1 || dest == -1) {
+ errno = EINVAL;
+ goto return_error;
+ }
+
+ cd = (iconv_t) (dest << 8) | src;
+
+return_error:
+ return cd;
+}
+
+int iconv_close(iconv_t UNUSED cd)
+{
+ return 0;
+}
+
+size_t iconv(iconv_t cd, const char **inbuf, size_t *insize,
+ char **outbuf, size_t *outsize)
+{
+ struct converter *in = &supported_codesets[cd & 0xff];
+ struct converter *out = &supported_codesets[cd >> 8];
+ size_t nonidentical = 0;
+ if (!inbuf || !*inbuf || !outbuf || !*outbuf)
+ return 0;
+ while (*insize) {
+ int scalar, infwd, outfwd;
+ infwd = in->decode(&scalar, (unsigned char *)*inbuf, *insize);
+ if (infwd < 0)
+ goto ret_error;
+
+ outfwd = out->encode(scalar, (unsigned char *)*outbuf);
+ if (outfwd > *outsize) {
+ errno = E2BIG;
+ goto ret_error;
+ }
+
+ if (**inbuf != scalar || outfwd == 0)
+ nonidentical++;
+
+ *inbuf += infwd;
+ *insize -= infwd;
+ *outbuf += outfwd;
+ *outsize -= outfwd;
+ }
+ if (*outsize)
+ *outbuf = '\0';
+ return nonidentical;
+
+ret_error:
+ return -1;
+}
diff --git a/iconv.h b/iconv.h
new file mode 100644
index 0000000..42b3ea3
--- /dev/null
+++ b/iconv.h
@@ -0,0 +1,16 @@
+#ifndef ICONV_H
+#define ICONV_H
+
+#include <stdlib.h>
+
+typedef int iconv_t;
+
+iconv_t iconv_open(const char *tocode, const char *fromcode) __THROW;
+
+size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft) __THROW;
+
+int iconv_close(iconv_t) __THROW;
+
+
+#endif
diff --git a/test.c b/test.c
new file mode 100644
index 0000000..1cb8011
--- /dev/null
+++ b/test.c
@@ -0,0 +1,72 @@
+#include <errno.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "iconv.h"
+
+void print_hex(char *str)
+{
+ printf("%.2x", (unsigned char) *str);
+ str++;
+ while (*str) {
+ printf(" %.2x", (unsigned char) *str);
+ str++;
+ }
+}
+
+void convert_string(iconv_t cd, char *str)
+{
+ char outbuf[256];
+ char *outptr = outbuf;
+ const char *inptr = str;
+ size_t outsize = sizeof(outbuf);
+ size_t insize = strlen(str);
+ int ret, err = 0;
+ memset(outbuf, 0, sizeof(outbuf));
+ print_hex(str);
+ printf(" -> ");
+ ret = iconv(cd, &inptr, &insize, &outptr, &outsize);
+ if (ret < 0)
+ err= errno;
+ print_hex(outbuf);
+ if (err)
+ printf(" (%s)", strerror(err));
+ printf("\n");
+}
+
+
+void convert_args(char *from, char *to, int argc, char **argv)
+{
+ iconv_t cd;
+ int i;
+
+ printf("\n>>> %s: %s -> %s\n", basename(argv[0]), from, to);
+ cd = iconv_open(to, from);
+ if (cd < 0) {
+ printf("iconv_open(\"%s\", \"%s\"): %s\n", to, from, strerror(errno));
+ return;
+ }
+
+ for (i = 1; i < argc; i++)
+ convert_string(cd, argv[i]);
+
+ iconv_close(cd);
+}
+
+
+int main(int argc, char **argv)
+{
+ char *codesets[] = { "ASCII", "ISO-8859-1", "UTF-8", "invalid", NULL };
+ char **from, **to;
+
+ for (from = codesets; *from; from++)
+ for (to = codesets; *to; to++)
+ convert_args(*from, *to, argc, argv);
+
+ return 0;
+}
+
+
+
+