xref: /PHP-5.3/ext/standard/soundex.c (revision a2045ff3)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2013 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Bj�rn Borud - Guardian Networks AS <borud@guardian.no>       |
16    +----------------------------------------------------------------------+
17  */
18 /* $Id$ */
19 
20 #include "php.h"
21 #include <stdlib.h>
22 #include <errno.h>
23 #include <ctype.h>
24 #include "php_string.h"
25 
26 /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
27 /* {{{ proto string soundex(string str)
28    Calculate the soundex key of a string */
PHP_FUNCTION(soundex)29 PHP_FUNCTION(soundex)
30 {
31 	char	*str;
32 	int	i, _small, str_len, code, last;
33 	char	soundex[4 + 1];
34 
35 	static char soundex_table[26] =
36 	{0,							/* A */
37 	 '1',						/* B */
38 	 '2',						/* C */
39 	 '3',						/* D */
40 	 0,							/* E */
41 	 '1',						/* F */
42 	 '2',						/* G */
43 	 0,							/* H */
44 	 0,							/* I */
45 	 '2',						/* J */
46 	 '2',						/* K */
47 	 '4',						/* L */
48 	 '5',						/* M */
49 	 '5',						/* N */
50 	 0,							/* O */
51 	 '1',						/* P */
52 	 '2',						/* Q */
53 	 '6',						/* R */
54 	 '2',						/* S */
55 	 '3',						/* T */
56 	 0,							/* U */
57 	 '1',						/* V */
58 	 0,							/* W */
59 	 '2',						/* X */
60 	 0,							/* Y */
61 	 '2'};						/* Z */
62 
63 	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &str, &str_len) == FAILURE) {
64 		return;
65 	}
66 	if (str_len == 0) {
67 		RETURN_FALSE;
68 	}
69 
70 	/* build soundex string */
71 	last = -1;
72 	for (i = 0, _small = 0; i < str_len && _small < 4; i++) {
73 		/* convert chars to upper case and strip non-letter chars */
74 		/* BUG: should also map here accented letters used in non */
75 		/* English words or names (also found in English text!): */
76 		/* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
77 		code = toupper((int)(unsigned char)str[i]);
78 		if (code >= 'A' && code <= 'Z') {
79 			if (_small == 0) {
80 				/* remember first valid char */
81 				soundex[_small++] = code;
82 				last = soundex_table[code - 'A'];
83 			}
84 			else {
85 				/* ignore sequences of consonants with same soundex */
86 				/* code in trail, and vowels unless they separate */
87 				/* consonant letters */
88 				code = soundex_table[code - 'A'];
89 				if (code != last) {
90 					if (code != 0) {
91 						soundex[_small++] = code;
92 					}
93 					last = code;
94 				}
95 			}
96 		}
97 	}
98 	/* pad with '0' and terminate with 0 ;-) */
99 	while (_small < 4) {
100 		soundex[_small++] = '0';
101 	}
102 	soundex[_small] = '\0';
103 
104 	RETURN_STRINGL(soundex, _small, 1);
105 }
106 /* }}} */
107 
108 /*
109  * Local variables:
110  * tab-width: 4
111  * c-basic-offset: 4
112  * End:
113  * vim600: sw=4 ts=4 fdm=marker
114  * vim<600: sw=4 ts=4
115  */
116