1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2018 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no> |
16 +----------------------------------------------------------------------+
17 */
18 /* $Id$ */
19
20 #include "php.h"
21 #include <stdlib.h>
22 #include <errno.h>
23 #include <ctype.h>
24 #include "php_string.h"
25
26 /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
27 /* {{{ proto string soundex(string str)
28 Calculate the soundex key of a string */
PHP_FUNCTION(soundex)29 PHP_FUNCTION(soundex)
30 {
31 char *str;
32 size_t i, _small, str_len, code, last;
33 char soundex[4 + 1];
34
35 static char soundex_table[26] =
36 {0, /* A */
37 '1', /* B */
38 '2', /* C */
39 '3', /* D */
40 0, /* E */
41 '1', /* F */
42 '2', /* G */
43 0, /* H */
44 0, /* I */
45 '2', /* J */
46 '2', /* K */
47 '4', /* L */
48 '5', /* M */
49 '5', /* N */
50 0, /* O */
51 '1', /* P */
52 '2', /* Q */
53 '6', /* R */
54 '2', /* S */
55 '3', /* T */
56 0, /* U */
57 '1', /* V */
58 0, /* W */
59 '2', /* X */
60 0, /* Y */
61 '2'}; /* Z */
62
63 ZEND_PARSE_PARAMETERS_START(1, 1)
64 Z_PARAM_STRING(str, str_len)
65 ZEND_PARSE_PARAMETERS_END();
66
67 if (str_len == 0) {
68 RETURN_FALSE;
69 }
70
71 /* build soundex string */
72 last = -1;
73 for (i = 0, _small = 0; i < str_len && _small < 4; i++) {
74 /* convert chars to upper case and strip non-letter chars */
75 /* BUG: should also map here accented letters used in non */
76 /* English words or names (also found in English text!): */
77 /* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
78 code = toupper((int)(unsigned char)str[i]);
79 if (code >= 'A' && code <= 'Z') {
80 if (_small == 0) {
81 /* remember first valid char */
82 soundex[_small++] = (char)code;
83 last = soundex_table[code - 'A'];
84 }
85 else {
86 /* ignore sequences of consonants with same soundex */
87 /* code in trail, and vowels unless they separate */
88 /* consonant letters */
89 code = soundex_table[code - 'A'];
90 if (code != last) {
91 if (code != 0) {
92 soundex[_small++] = (char)code;
93 }
94 last = code;
95 }
96 }
97 }
98 }
99 /* pad with '0' and terminate with 0 ;-) */
100 while (_small < 4) {
101 soundex[_small++] = '0';
102 }
103 soundex[_small] = '\0';
104
105 RETURN_STRINGL(soundex, _small);
106 }
107 /* }}} */
108
109 /*
110 * Local variables:
111 * tab-width: 4
112 * c-basic-offset: 4
113 * End:
114 * vim600: sw=4 ts=4 fdm=marker
115 * vim<600: sw=4 ts=4
116 */
117