1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Gustavo Lopes <cataphract@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "codepointiterator_internal.h"
18 #include <unicode/uchriter.h>
19 #include <typeinfo>
20
21 #include "php.h"
22
23 //copied from cmemory.h, which is not public
24 typedef union {
25 zend_long t1;
26 double t2;
27 void *t3;
28 } UAlignedMemory;
29
30 #define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
31 #define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
32 #define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
33
34 using namespace PHP;
35
36 using icu::UCharCharacterIterator;
37
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CodePointBreakIterator)38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CodePointBreakIterator)
39
40 CodePointBreakIterator::CodePointBreakIterator()
41 : BreakIterator(), fCharIter(NULL), lastCodePoint(U_SENTINEL)
42 {
43 UErrorCode uec = UErrorCode();
44 this->fText = utext_openUChars(NULL, NULL, 0, &uec);
45 }
46
CodePointBreakIterator(const PHP::CodePointBreakIterator & other)47 CodePointBreakIterator::CodePointBreakIterator(const PHP::CodePointBreakIterator &other)
48 : BreakIterator(other), fText(NULL), fCharIter(NULL), lastCodePoint(U_SENTINEL)
49 {
50 *this = other;
51 }
52
operator =(const CodePointBreakIterator & that)53 CodePointBreakIterator& CodePointBreakIterator::operator=(const CodePointBreakIterator& that)
54 {
55 UErrorCode uec = UErrorCode();
56
57 if (this == &that) {
58 return *this;
59 }
60
61 this->fText = utext_clone(this->fText, that.fText, FALSE, TRUE, &uec);
62
63 //don't bother copying the character iterator, getText() is deprecated
64 clearCurrentCharIter();
65
66 this->lastCodePoint = that.lastCodePoint;
67 return *this;
68 }
69
~CodePointBreakIterator()70 CodePointBreakIterator::~CodePointBreakIterator()
71 {
72 if (this->fText) {
73 utext_close(this->fText);
74 }
75 clearCurrentCharIter();
76 }
77
operator ==(const BreakIterator & that) const78 UBool CodePointBreakIterator::operator==(const BreakIterator& that) const
79 {
80 if (typeid(*this) != typeid(that)) {
81 return FALSE;
82 }
83
84 const CodePointBreakIterator& that2 =
85 static_cast<const CodePointBreakIterator&>(that);
86
87 if (!utext_equals(this->fText, that2.fText)) {
88 return FALSE;
89 }
90
91 return TRUE;
92 }
93
clone(void) const94 CodePointBreakIterator* CodePointBreakIterator::clone(void) const
95 {
96 return new CodePointBreakIterator(*this);
97 }
98
getText(void) const99 CharacterIterator& CodePointBreakIterator::getText(void) const
100 {
101 if (this->fCharIter == NULL) {
102 //this method is deprecated anyway; setup bogus iterator
103 static const UChar c = 0;
104 this->fCharIter = new UCharCharacterIterator(&c, 0);
105 }
106
107 return *this->fCharIter;
108 }
109
getUText(UText * fillIn,UErrorCode & status) const110 UText *CodePointBreakIterator::getUText(UText *fillIn, UErrorCode &status) const
111 {
112 return utext_clone(fillIn, this->fText, FALSE, TRUE, &status);
113 }
114
setText(const UnicodeString & text)115 void CodePointBreakIterator::setText(const UnicodeString &text)
116 {
117 UErrorCode uec = UErrorCode();
118
119 //this closes the previous utext, if any
120 this->fText = utext_openConstUnicodeString(this->fText, &text, &uec);
121
122 clearCurrentCharIter();
123 }
124
setText(UText * text,UErrorCode & status)125 void CodePointBreakIterator::setText(UText *text, UErrorCode &status)
126 {
127 if (U_FAILURE(status)) {
128 return;
129 }
130
131 this->fText = utext_clone(this->fText, text, FALSE, TRUE, &status);
132
133 clearCurrentCharIter();
134 }
135
adoptText(CharacterIterator * it)136 void CodePointBreakIterator::adoptText(CharacterIterator* it)
137 {
138 UErrorCode uec = UErrorCode();
139 clearCurrentCharIter();
140
141 this->fCharIter = it;
142 this->fText = utext_openCharacterIterator(this->fText, it, &uec);
143 }
144
first(void)145 int32_t CodePointBreakIterator::first(void)
146 {
147 UTEXT_SETNATIVEINDEX(this->fText, 0);
148 this->lastCodePoint = U_SENTINEL;
149
150 return 0;
151 }
152
last(void)153 int32_t CodePointBreakIterator::last(void)
154 {
155 int32_t pos = (int32_t)utext_nativeLength(this->fText);
156 UTEXT_SETNATIVEINDEX(this->fText, pos);
157 this->lastCodePoint = U_SENTINEL;
158
159 return pos;
160 }
161
previous(void)162 int32_t CodePointBreakIterator::previous(void)
163 {
164 this->lastCodePoint = UTEXT_PREVIOUS32(this->fText);
165 if (this->lastCodePoint == U_SENTINEL) {
166 return BreakIterator::DONE;
167 }
168
169 return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
170 }
171
next(void)172 int32_t CodePointBreakIterator::next(void)
173 {
174 this->lastCodePoint = UTEXT_NEXT32(this->fText);
175 if (this->lastCodePoint == U_SENTINEL) {
176 return BreakIterator::DONE;
177 }
178
179 return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
180 }
181
current(void) const182 int32_t CodePointBreakIterator::current(void) const
183 {
184 return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
185 }
186
following(int32_t offset)187 int32_t CodePointBreakIterator::following(int32_t offset)
188 {
189 this->lastCodePoint = utext_next32From(this->fText, offset);
190 if (this->lastCodePoint == U_SENTINEL) {
191 return BreakIterator::DONE;
192 }
193
194 return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
195 }
196
preceding(int32_t offset)197 int32_t CodePointBreakIterator::preceding(int32_t offset)
198 {
199 this->lastCodePoint = utext_previous32From(this->fText, offset);
200 if (this->lastCodePoint == U_SENTINEL) {
201 return BreakIterator::DONE;
202 }
203
204 return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
205 }
206
isBoundary(int32_t offset)207 UBool CodePointBreakIterator::isBoundary(int32_t offset)
208 {
209 //this function has side effects, and it's supposed to
210 utext_setNativeIndex(this->fText, offset);
211 return (offset == utext_getNativeIndex(this->fText));
212 }
213
next(int32_t n)214 int32_t CodePointBreakIterator::next(int32_t n)
215 {
216 UBool res = utext_moveIndex32(this->fText, n);
217
218 #ifndef UTEXT_CURRENT32
219 #define UTEXT_CURRENT32 utext_current32
220 #endif
221
222 if (res) {
223 this->lastCodePoint = UTEXT_CURRENT32(this->fText);
224 return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
225 } else {
226 this->lastCodePoint = U_SENTINEL;
227 return BreakIterator::DONE;
228 }
229 }
230
createBufferClone(void * stackBuffer,int32_t & bufferSize,UErrorCode & status)231 CodePointBreakIterator *CodePointBreakIterator::createBufferClone(
232 void *stackBuffer, int32_t &bufferSize, UErrorCode &status)
233 {
234 //see implementation of RuleBasedBreakIterator::createBufferClone()
235 if (U_FAILURE(status)) {
236 return NULL;
237 }
238
239 if (bufferSize <= 0) {
240 bufferSize = sizeof(CodePointBreakIterator) + U_ALIGNMENT_OFFSET_UP(0);
241 return NULL;
242 }
243
244 char *buf = (char*)stackBuffer;
245 uint32_t s = bufferSize;
246
247 if (stackBuffer == NULL) {
248 s = 0;
249 }
250
251 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
252 uint32_t offsetUp = (uint32_t)U_ALIGNMENT_OFFSET_UP(buf);
253 s -= offsetUp;
254 buf += offsetUp;
255 }
256
257 if (s < sizeof(CodePointBreakIterator)) {
258 CodePointBreakIterator *clonedBI = new CodePointBreakIterator(*this);
259 if (clonedBI == NULL) {
260 status = U_MEMORY_ALLOCATION_ERROR;
261 } else {
262 status = U_SAFECLONE_ALLOCATED_WARNING;
263 }
264
265 return clonedBI;
266 }
267
268 return new(buf) CodePointBreakIterator(*this);
269 }
270
refreshInputText(UText * input,UErrorCode & status)271 CodePointBreakIterator &CodePointBreakIterator::refreshInputText(UText *input, UErrorCode &status)
272 {
273 //see implementation of RuleBasedBreakIterator::createBufferClone()
274 if (U_FAILURE(status)) {
275 return *this;
276 }
277 if (input == NULL) {
278 status = U_ILLEGAL_ARGUMENT_ERROR;
279 return *this;
280 }
281
282 int64_t pos = utext_getNativeIndex(this->fText);
283 this->fText = utext_clone(this->fText, input, FALSE, TRUE, &status);
284 if (U_FAILURE(status)) {
285 return *this;
286 }
287
288 utext_setNativeIndex(this->fText, pos);
289 if (utext_getNativeIndex(fText) != pos) {
290 status = U_ILLEGAL_ARGUMENT_ERROR;
291 }
292
293 return *this;
294 }
295