xref: /php-src/ext/standard/soundex.c (revision 01b3fc03)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no>       |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include "php.h"
18 #include <stdlib.h>
19 #include <errno.h>
20 #include <ctype.h>
21 #include "php_string.h"
22 
23 /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
24 /* {{{ Calculate the soundex key of a string */
PHP_FUNCTION(soundex)25 PHP_FUNCTION(soundex)
26 {
27 	char	*str;
28 	size_t	i, _small, str_len, code, last;
29 	char	soundex[4 + 1];
30 
31 	static const char soundex_table[26] =
32 	{0,							/* A */
33 	 '1',						/* B */
34 	 '2',						/* C */
35 	 '3',						/* D */
36 	 0,							/* E */
37 	 '1',						/* F */
38 	 '2',						/* G */
39 	 0,							/* H */
40 	 0,							/* I */
41 	 '2',						/* J */
42 	 '2',						/* K */
43 	 '4',						/* L */
44 	 '5',						/* M */
45 	 '5',						/* N */
46 	 0,							/* O */
47 	 '1',						/* P */
48 	 '2',						/* Q */
49 	 '6',						/* R */
50 	 '2',						/* S */
51 	 '3',						/* T */
52 	 0,							/* U */
53 	 '1',						/* V */
54 	 0,							/* W */
55 	 '2',						/* X */
56 	 0,							/* Y */
57 	 '2'};						/* Z */
58 
59 	ZEND_PARSE_PARAMETERS_START(1, 1)
60 		Z_PARAM_STRING(str, str_len)
61 	ZEND_PARSE_PARAMETERS_END();
62 
63 	/* build soundex string */
64 	last = -1;
65 	for (i = 0, _small = 0; i < str_len && _small < 4; i++) {
66 		/* convert chars to upper case and strip non-letter chars */
67 		/* BUG: should also map here accented letters used in non */
68 		/* English words or names (also found in English text!): */
69 		/* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
70 		code = toupper((int)(unsigned char)str[i]);
71 		if (code >= 'A' && code <= 'Z') {
72 			if (_small == 0) {
73 				/* remember first valid char */
74 				soundex[_small++] = (char)code;
75 				last = soundex_table[code - 'A'];
76 			}
77 			else {
78 				/* ignore sequences of consonants with same soundex */
79 				/* code in trail, and vowels unless they separate */
80 				/* consonant letters */
81 				code = soundex_table[code - 'A'];
82 				if (code != last) {
83 					if (code != 0) {
84 						soundex[_small++] = (char)code;
85 					}
86 					last = code;
87 				}
88 			}
89 		}
90 	}
91 	/* pad with '0' and terminate with 0 ;-) */
92 	while (_small < 4) {
93 		soundex[_small++] = '0';
94 	}
95 	soundex[_small] = '\0';
96 
97 	RETURN_STRINGL(soundex, _small);
98 }
99 /* }}} */
100