xref: /PHP-8.4/ext/gd/libgd/gdkanji.c (revision 70872dde)
1 
2 /* gdkanji.c (Kanji code converter)                            */
3 /*                 written by Masahito Yamaga (ma@yama-ga.com) */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include "gd.h"
9 #include "gdhelpers.h"
10 
11 #include <stdarg.h>
12 #if defined(HAVE_ICONV_H) || defined(HAVE_ICONV)
13 #include <iconv.h>
14 #include <errno.h>
15 #endif
16 
17 #if defined(HAVE_ICONV_H) && !defined(HAVE_ICONV)
18 #define HAVE_ICONV 1
19 #endif
20 
21 #define LIBNAME "any2eucjp()"
22 
23 #if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
24 #ifndef SJISPRE
25 #define SJISPRE 1
26 #endif
27 #endif
28 
29 #ifdef TRUE
30 #undef TRUE
31 #endif
32 #ifdef FALSE
33 #undef FALSE
34 #endif
35 
36 #define TRUE  1
37 #define FALSE 0
38 
39 #define NEW 1
40 #define OLD 2
41 #define ESCI 3
42 #define NEC 4
43 #define EUC 5
44 #define SJIS 6
45 #define EUCORSJIS 7
46 #define ASCII 8
47 
48 #define NEWJISSTR "JIS7"
49 #define OLDJISSTR "jis"
50 #define EUCSTR    "eucJP"
51 #define SJISSTR   "SJIS"
52 
53 #define ESC 27
54 #define SS2 142
55 
56 static void
debug(const char * format,...)57 debug (const char *format,...)
58 {
59 #ifdef DEBUG
60   va_list args;
61 
62   va_start (args, format);
63   fprintf (stdout, "%s: ", LIBNAME);
64   vfprintf (stdout, format, args);
65   fprintf (stdout, "\n");
66   va_end (args);
67 #endif
68 }
69 
70 static void
error(const char * format,...)71 error (const char *format,...)
72 {
73 	va_list args;
74 	char *tmp;
75 
76 	va_start(args, format);
77 	vspprintf(&tmp, 0, format, args);
78 	va_end(args);
79 	php_error_docref(NULL, E_WARNING, "%s: %s", LIBNAME, tmp);
80 	efree(tmp);
81 }
82 
83 /* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
84 
85 static int
DetectKanjiCode(unsigned char * str)86 DetectKanjiCode (unsigned char *str)
87 {
88   static int whatcode = ASCII;
89   int oldcode = ASCII;
90   int c, i;
91   char *lang = NULL;
92 
93   c = '\1';
94   i = 0;
95 
96   if (whatcode != EUCORSJIS && whatcode != ASCII)
97     {
98       oldcode = whatcode;
99       whatcode = ASCII;
100     }
101 
102   while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0')
103     {
104       if ((c = str[i++]) != '\0')
105 	{
106 	  if (c == ESC)
107 	    {
108 	      c = str[i++];
109 	      if (c == '$')
110 		{
111 		  c = str[i++];
112 		  if (c == 'B')
113 		    whatcode = NEW;
114 		  else if (c == '@')
115 		    whatcode = OLD;
116 		}
117 	      else if (c == '(')
118 		{
119 		  c = str[i++];
120 		  if (c == 'I')
121 		    whatcode = ESCI;
122 		}
123 	      else if (c == 'K')
124 		whatcode = NEC;
125 	    }
126 	  else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
127 	    whatcode = SJIS;
128 	  else if (c == SS2)
129 	    {
130 	      c = str[i++];
131 	      if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160) || (c >= 224 && c <= 252))
132 		whatcode = SJIS;
133 	      else if (c >= 161 && c <= 223)
134 		whatcode = EUCORSJIS;
135 	    }
136 	  else if (c >= 161 && c <= 223)
137 	    {
138 	      c = str[i++];
139 	      if (c >= 240 && c <= 254)
140 		whatcode = EUC;
141 	      else if (c >= 161 && c <= 223)
142 		whatcode = EUCORSJIS;
143 	      else if (c >= 224 && c <= 239)
144 		{
145 		  whatcode = EUCORSJIS;
146 		  while (c >= 64 && whatcode == EUCORSJIS)
147 		    {
148 		      if (c >= 129)
149 			{
150 			  if (c <= 141 || (c >= 143 && c <= 159))
151 			    whatcode = SJIS;
152 			  else if (c >= 253 && c <= 254)
153 			    whatcode = EUC;
154 			}
155 		      c = str[i++];
156 		    }
157 		}
158 	      else if (c <= 159)
159 		whatcode = SJIS;
160 	    }
161 	  else if (c >= 240 && c <= 254)
162 	    whatcode = EUC;
163 	  else if (c >= 224 && c <= 239)
164 	    {
165 	      c = str[i++];
166 	      if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
167 		whatcode = SJIS;
168 	      else if (c >= 253 && c <= 254)
169 		whatcode = EUC;
170 	      else if (c >= 161 && c <= 252)
171 		whatcode = EUCORSJIS;
172 	    }
173 	}
174     }
175 
176 #ifdef DEBUG
177   if (whatcode == ASCII)
178     debug ("Kanji code not included.");
179   else if (whatcode == EUCORSJIS)
180     debug ("Kanji code not detected.");
181   else
182     debug ("Kanji code detected at %d byte.", i);
183 #endif
184 
185   if (whatcode == EUCORSJIS && oldcode != ASCII)
186     whatcode = oldcode;
187 
188   if (whatcode == EUCORSJIS)
189     {
190       if (getenv ("LC_ALL"))
191 	lang = getenv ("LC_ALL");
192       else if (getenv ("LC_CTYPE"))
193 	lang = getenv ("LC_CTYPE");
194       else if (getenv ("LANG"))
195 	lang = getenv ("LANG");
196 
197       if (lang)
198 	{
199 	  if (strcmp (lang, "ja_JP.SJIS") == 0 ||
200 #ifdef hpux
201 	      strcmp (lang, "japanese") == 0 ||
202 #endif
203 	      strcmp (lang, "ja_JP.mscode") == 0 ||
204 	      strcmp (lang, "ja_JP.PCK") == 0)
205 	    whatcode = SJIS;
206 	  else if (strncmp (lang, "ja", 2) == 0)
207 #ifdef SJISPRE
208 	    whatcode = SJIS;
209 #else
210 	    whatcode = EUC;
211 #endif
212 	}
213     }
214 
215   if (whatcode == EUCORSJIS)
216 #ifdef SJISPRE
217     whatcode = SJIS;
218 #else
219     whatcode = EUC;
220 #endif
221 
222   return whatcode;
223 }
224 
225 /* SJIStoJIS() is sjis2jis() by Ken Lunde. */
226 
227 static void
SJIStoJIS(int * p1,int * p2)228 SJIStoJIS (int *p1, int *p2)
229 {
230   register unsigned char c1 = *p1;
231   register unsigned char c2 = *p2;
232   register int adjust = c2 < 159;
233   register int rowOffset = c1 < 160 ? 112 : 176;
234   register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
235 
236   *p1 = ((c1 - rowOffset) << 1) - adjust;
237   *p2 -= cellOffset;
238 }
239 
240 /* han2zen() was derived from han2zen() written by Ken Lunde. */
241 
242 #define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
243 #define IS_HANDAKU(c) (c >= 202 && c <= 206)
244 
245 static void
han2zen(int * p1,int * p2)246 han2zen (int *p1, int *p2)
247 {
248   int c = *p1;
249   int daku = FALSE;
250   int handaku = FALSE;
251   int mtable[][2] =
252   {
253     {129, 66},
254     {129, 117},
255     {129, 118},
256     {129, 65},
257     {129, 69},
258     {131, 146},
259     {131, 64},
260     {131, 66},
261     {131, 68},
262     {131, 70},
263     {131, 72},
264     {131, 131},
265     {131, 133},
266     {131, 135},
267     {131, 98},
268     {129, 91},
269     {131, 65},
270     {131, 67},
271     {131, 69},
272     {131, 71},
273     {131, 73},
274     {131, 74},
275     {131, 76},
276     {131, 78},
277     {131, 80},
278     {131, 82},
279     {131, 84},
280     {131, 86},
281     {131, 88},
282     {131, 90},
283     {131, 92},
284     {131, 94},
285     {131, 96},
286     {131, 99},
287     {131, 101},
288     {131, 103},
289     {131, 105},
290     {131, 106},
291     {131, 107},
292     {131, 108},
293     {131, 109},
294     {131, 110},
295     {131, 113},
296     {131, 116},
297     {131, 119},
298     {131, 122},
299     {131, 125},
300     {131, 126},
301     {131, 128},
302     {131, 129},
303     {131, 130},
304     {131, 132},
305     {131, 134},
306     {131, 136},
307     {131, 137},
308     {131, 138},
309     {131, 139},
310     {131, 140},
311     {131, 141},
312     {131, 143},
313     {131, 147},
314     {129, 74},
315     {129, 75}
316   };
317 
318   if (*p2 == 222 && IS_DAKU (*p1))
319     daku = TRUE;		/* Daku-ten */
320   else if (*p2 == 223 && IS_HANDAKU (*p1))
321     handaku = TRUE;		/* Han-daku-ten */
322 
323   *p1 = mtable[c - 161][0];
324   *p2 = mtable[c - 161][1];
325 
326   if (daku)
327     {
328       if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
329 	(*p2)++;
330       else if (*p2 == 131 || *p2 == 69)
331 	*p2 = 148;
332     }
333   else if (handaku && *p2 >= 110 && *p2 <= 122)
334     (*p2) += 2;
335 }
336 
337 /* Recast strcpy to handle unsigned chars used below. */
338 #define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
339 
340 static void
do_convert(unsigned char * to,unsigned char * from,const char * code)341 do_convert (unsigned char *to, unsigned char *from, const char *code)
342 {
343 #ifdef HAVE_ICONV
344   iconv_t cd;
345   size_t from_len, to_len;
346 
347   if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1)
348     {
349       error ("iconv_open() error");
350       if (errno == EINVAL)
351 	error ("invalid code specification: \"%s\" or \"%s\"",
352 	       EUCSTR, code);
353       strcpy ((char *) to, (const char *) from);
354       return;
355     }
356 
357   from_len = strlen ((const char *) from) + 1;
358   to_len = BUFSIZ;
359 
360   if ((int) iconv(cd, (char **) &from, &from_len, (char **) &to, &to_len) == -1)
361     {
362       if (errno == EINVAL)
363 	error ("invalid end of input string");
364       else if (errno == EILSEQ)
365 	error ("invalid code in input string");
366       else if (errno == E2BIG)
367 	error ("output buffer overflow at do_convert()");
368       else
369 	error ("something happen");
370       strcpy ((char *) to, (const char *) from);
371       return;
372     }
373 
374   if (iconv_close (cd) != 0)
375     {
376       error ("iconv_close() error");
377     }
378 #else
379   int p1, p2, i, j;
380   int jisx0208 = FALSE;
381   int hankaku = FALSE;
382 
383   j = 0;
384   if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0)
385     {
386       for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
387 	{
388 	  if (from[i] == ESC)
389 	    {
390 	      i++;
391 	      if (from[i] == '$')
392 		{
393 		  jisx0208 = TRUE;
394 		  hankaku = FALSE;
395 		  i++;
396 		}
397 	      else if (from[i] == '(')
398 		{
399 		  jisx0208 = FALSE;
400 		  i++;
401 		  if (from[i] == 'I')	/* Hankaku Kana */
402 		    hankaku = TRUE;
403 		  else
404 		    hankaku = FALSE;
405 		}
406 	    }
407 	  else
408 	    {
409 	      if (jisx0208)
410 		to[j++] = from[i] + 128;
411 	      else if (hankaku)
412 		{
413 		  to[j++] = SS2;
414 		  to[j++] = from[i] + 128;
415 		}
416 	      else
417 		to[j++] = from[i];
418 	    }
419 	}
420     }
421   else if (strcmp (code, SJISSTR) == 0)
422     {
423       for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
424 	{
425 	  p1 = from[i];
426 	  if (p1 < 127)
427 	    to[j++] = p1;
428 	  else if ((p1 >= 161) && (p1 <= 223))
429 	    {			/* Hankaku Kana */
430 	      to[j++] = SS2;
431 	      to[j++] = p1;
432 	    }
433 	  else
434 	    {
435 	      p2 = from[++i];
436 	      SJIStoJIS (&p1, &p2);
437 	      to[j++] = p1 + 128;
438 	      to[j++] = p2 + 128;
439 	    }
440 	}
441     }
442   else
443     {
444       error ("invalid code specification: \"%s\"", code);
445       return;
446     }
447 
448   if (j >= BUFSIZ)
449     {
450       error ("output buffer overflow at do_convert()");
451       ustrcpy (to, from);
452     }
453   else
454     to[j] = '\0';
455 #endif /* HAVE_ICONV */
456 }
457 
458 static int
do_check_and_conv(unsigned char * to,unsigned char * from)459 do_check_and_conv (unsigned char *to, unsigned char *from)
460 {
461   static unsigned char tmp[BUFSIZ];
462   int p1, p2, i, j;
463   int kanji = TRUE;
464 
465   switch (DetectKanjiCode (from))
466     {
467     case NEW:
468       debug ("Kanji code is New JIS.");
469       do_convert (tmp, from, NEWJISSTR);
470       break;
471     case OLD:
472       debug ("Kanji code is Old JIS.");
473       do_convert (tmp, from, OLDJISSTR);
474       break;
475     case ESCI:
476       debug ("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
477       do_convert (tmp, from, NEWJISSTR);
478       break;
479     case NEC:
480       debug ("Kanji code is NEC Kanji.");
481       error ("cannot convert NEC Kanji.");
482       ustrcpy (tmp, from);
483       kanji = FALSE;
484       break;
485     case EUC:
486       debug ("Kanji code is EUC.");
487       ustrcpy (tmp, from);
488       break;
489     case SJIS:
490       debug ("Kanji code is SJIS.");
491       do_convert (tmp, from, SJISSTR);
492       break;
493     case EUCORSJIS:
494       debug ("Kanji code is EUC or SJIS.");
495       ustrcpy (tmp, from);
496       kanji = FALSE;
497       break;
498     case ASCII:
499       debug ("This is ASCII string.");
500       ustrcpy (tmp, from);
501       kanji = FALSE;
502       break;
503     default:
504       debug ("This string includes unknown code.");
505       ustrcpy (tmp, from);
506       kanji = FALSE;
507       break;
508     }
509 
510   /* Hankaku Kana ---> Zenkaku Kana */
511   if (kanji)
512     {
513       j = 0;
514       for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++)
515 	{
516 	  if (tmp[i] == SS2)
517 	    {
518 	      p1 = tmp[++i];
519 	      if (tmp[i + 1] == SS2)
520 		{
521 		  p2 = tmp[i + 2];
522 		  if (p2 == 222 || p2 == 223)
523 		    i += 2;
524 		  else
525 		    p2 = 0;
526 		}
527 	      else
528 		p2 = 0;
529 	      han2zen (&p1, &p2);
530 	      SJIStoJIS (&p1, &p2);
531 	      to[j++] = p1 + 128;
532 	      to[j++] = p2 + 128;
533 	    }
534 	  else
535 	    to[j++] = tmp[i];
536 	}
537 
538       if (j >= BUFSIZ)
539 	{
540 	  error ("output buffer overflow at Hankaku --> Zenkaku");
541 	  ustrcpy (to, tmp);
542 	}
543       else
544 	to[j] = '\0';
545     }
546   else
547     ustrcpy (to, tmp);
548 
549   return kanji;
550 }
551 
552 int
any2eucjp(unsigned char * dest,unsigned char * src,unsigned int dest_max)553 any2eucjp (unsigned char *dest, unsigned char *src, unsigned int dest_max)
554 {
555   static unsigned char tmp_dest[BUFSIZ];
556   int ret;
557 
558   if (strlen ((const char *) src) >= BUFSIZ)
559     {
560       error ("input string too large");
561       return -1;
562     }
563   if (dest_max > BUFSIZ)
564     {
565       error ("invalid maximum size of destination\nit should be less than %d.", BUFSIZ);
566       return -1;
567     }
568   ret = do_check_and_conv (tmp_dest, src);
569   if (strlen ((const char *) tmp_dest) >= dest_max)
570     {
571       error ("output buffer overflow");
572       ustrcpy (dest, src);
573       return -1;
574     }
575   ustrcpy (dest, tmp_dest);
576   return ret;
577 }
578 
579 #if 0
580 unsigned int
581 strwidth (unsigned char *s)
582 {
583   unsigned char *t;
584   unsigned int i;
585 
586   t = (unsigned char *) gdMalloc (BUFSIZ);
587   any2eucjp (t, s, BUFSIZ);
588   i = strlen (t);
589   gdFree (t);
590   return i;
591 }
592 
593 #ifdef DEBUG
594 int
595 main ()
596 {
597   unsigned char input[BUFSIZ];
598   unsigned char *output;
599   unsigned char *str;
600   int c, i = 0;
601 
602   while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
603     input[i++] = c;
604   input[i] = '\0';
605 
606   printf ("input : %d bytes\n", strlen ((const char *) input));
607   printf ("output: %d bytes\n", strwidth (input));
608 
609   output = (unsigned char *) gdMalloc (BUFSIZ);
610   any2eucjp (output, input, BUFSIZ);
611   str = output;
612   while (*str != '\0')
613     putchar (*(str++));
614   putchar ('\n');
615   gdFree (output);
616 
617   return 0;
618 }
619 #endif
620 #endif
621