aboutsummaryrefslogtreecommitdiffstats
path: root/main/musl/0039-fix-iconv-conversions-to-legacy-8bit-encodings.patch
blob: be89291505cdc30579e63637811475b22e5d94be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
From 97bd6b09dbe7478d5a90a06ecd9e5b59389d8eb9 Mon Sep 17 00:00:00 2001
From: Rich Felker <dalias@aerifal.cx>
Date: Sat, 27 May 2017 21:36:00 -0400
Subject: [PATCH] fix iconv conversions to legacy 8bit encodings

there was missing reverse-conversion logic for the case, handled
specially in the character set tables, where a byte represents a
unicode codepoint with the same value.

this patch adds code to handle the case, and refactors the two-level
10-bit table lookup for legacy character sets into a function to avoid
repeating it yet another time as part of the fix.
---
 src/locale/iconv.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/locale/iconv.c b/src/locale/iconv.c
index 1eeea94e..4636307f 100644
--- a/src/locale/iconv.c
+++ b/src/locale/iconv.c
@@ -151,6 +151,14 @@ static void put_32(unsigned char *s, unsigned c, int e)
 #define mbrtowc_utf8 mbrtowc
 #define wctomb_utf8 wctomb
 
+static unsigned legacy_map(const unsigned char *map, unsigned c)
+{
+	unsigned x = c - 128 + map[-1];
+	x = legacy_chars[ map[x*5/4]>>2*x%8 |
+		map[x*5/4+1]<<8-2*x%8 & 1023 ];
+	return x ? x : c;
+}
+
 size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb)
 {
 	size_t x=0;
@@ -364,10 +372,7 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
 			break;
 		default:
 			if (c < 128+type) break;
-			c -= 128+type;
-			c = legacy_chars[ map[c*5/4]>>2*c%8 |
-				map[c*5/4+1]<<8-2*c%8 & 1023 ];
-			if (!c) c = *(unsigned char *)*in;
+			c = legacy_map(map, c);
 			if (c==1) goto ilseq;
 		}
 
@@ -392,17 +397,15 @@ size_t iconv(iconv_t cd0, char **restrict in, size_t *restrict inb, char **restr
 			if (c > 0x7f) subst: x++, c='*';
 		default:
 			if (*outb < 1) goto toobig;
-			if (c < 128+totype) {
+			if (c < 128+totype || (c<256 && c==legacy_map(tomap, c))) {
 			revout:
 				*(*out)++ = c;
 				*outb -= 1;
 				break;
 			}
 			d = c;
-			for (c=0; c<128-totype; c++) {
-				if (d == legacy_chars[ tomap[c*5/4]>>2*c%8 |
-					tomap[c*5/4+1]<<8-2*c%8 & 1023 ]) {
-					c += 128;
+			for (c=128+totype; c<256; c++) {
+				if (d == legacy_map(tomap, c)) {
 					goto revout;
 				}
 			}
-- 
2.13.0