1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Gustavo Lopes <cataphract@php.net>                          |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include "codepointiterator_internal.h"
18 #include <unicode/uchriter.h>
19 #include <typeinfo>
20 
21 #include "php.h"
22 
23 //copied from cmemory.h, which is not public
24 typedef union {
25     zend_long    t1;
26     double  t2;
27     void   *t3;
28 } UAlignedMemory;
29 
30 #define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
31 #define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
32 #define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
33 
34 using namespace PHP;
35 
36 using icu::UCharCharacterIterator;
37 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CodePointBreakIterator)38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CodePointBreakIterator)
39 
40 CodePointBreakIterator::CodePointBreakIterator()
41 : BreakIterator(), fCharIter(NULL), lastCodePoint(U_SENTINEL)
42 {
43 	UErrorCode uec = UErrorCode();
44 	this->fText = utext_openUChars(NULL, NULL, 0, &uec);
45 }
46 
CodePointBreakIterator(const PHP::CodePointBreakIterator & other)47 CodePointBreakIterator::CodePointBreakIterator(const PHP::CodePointBreakIterator &other)
48 : BreakIterator(other), fText(NULL), fCharIter(NULL), lastCodePoint(U_SENTINEL)
49 {
50 	*this = other;
51 }
52 
operator =(const CodePointBreakIterator & that)53 CodePointBreakIterator& CodePointBreakIterator::operator=(const CodePointBreakIterator& that)
54 {
55 	UErrorCode uec = UErrorCode();
56 
57 	if (this == &that) {
58 		return *this;
59 	}
60 
61 	this->fText = utext_clone(this->fText, that.fText, FALSE, TRUE, &uec);
62 
63 	//don't bother copying the character iterator, getText() is deprecated
64 	clearCurrentCharIter();
65 
66 	this->lastCodePoint = that.lastCodePoint;
67 	return *this;
68 }
69 
~CodePointBreakIterator()70 CodePointBreakIterator::~CodePointBreakIterator()
71 {
72 	if (this->fText) {
73 		utext_close(this->fText);
74 	}
75 	clearCurrentCharIter();
76 }
77 
78 #if U_ICU_VERSION_MAJOR_NUM >= 70
operator ==(const BreakIterator & that) const79 bool CodePointBreakIterator::operator==(const BreakIterator& that) const
80 #else
81 UBool CodePointBreakIterator::operator==(const BreakIterator& that) const
82 #endif
83 {
84 	if (typeid(*this) != typeid(that)) {
85 		return FALSE;
86 	}
87 
88 	const CodePointBreakIterator& that2 =
89 		static_cast<const CodePointBreakIterator&>(that);
90 
91 	if (!utext_equals(this->fText, that2.fText)) {
92 		return FALSE;
93 	}
94 
95 	return TRUE;
96 }
97 
clone(void) const98 CodePointBreakIterator* CodePointBreakIterator::clone(void) const
99 {
100 	return new CodePointBreakIterator(*this);
101 }
102 
getText(void) const103 CharacterIterator& CodePointBreakIterator::getText(void) const
104 {
105 	if (this->fCharIter == NULL) {
106 		//this method is deprecated anyway; setup bogus iterator
107 		static const UChar c = 0;
108 		this->fCharIter = new UCharCharacterIterator(&c, 0);
109 	}
110 
111 	return *this->fCharIter;
112 }
113 
getUText(UText * fillIn,UErrorCode & status) const114 UText *CodePointBreakIterator::getUText(UText *fillIn, UErrorCode &status) const
115 {
116 	return utext_clone(fillIn, this->fText, FALSE, TRUE, &status);
117 }
118 
setText(const UnicodeString & text)119 void CodePointBreakIterator::setText(const UnicodeString &text)
120 {
121 	UErrorCode uec = UErrorCode();
122 
123 	//this closes the previous utext, if any
124 	this->fText = utext_openConstUnicodeString(this->fText, &text, &uec);
125 
126 	clearCurrentCharIter();
127 }
128 
setText(UText * text,UErrorCode & status)129 void CodePointBreakIterator::setText(UText *text, UErrorCode &status)
130 {
131 	if (U_FAILURE(status)) {
132 		return;
133 	}
134 
135 	this->fText = utext_clone(this->fText, text, FALSE, TRUE, &status);
136 
137 	clearCurrentCharIter();
138 }
139 
adoptText(CharacterIterator * it)140 void CodePointBreakIterator::adoptText(CharacterIterator* it)
141 {
142 	UErrorCode uec = UErrorCode();
143 	clearCurrentCharIter();
144 
145 	this->fCharIter = it;
146 	this->fText = utext_openCharacterIterator(this->fText, it, &uec);
147 }
148 
first(void)149 int32_t CodePointBreakIterator::first(void)
150 {
151 	UTEXT_SETNATIVEINDEX(this->fText, 0);
152 	this->lastCodePoint = U_SENTINEL;
153 
154 	return 0;
155 }
156 
last(void)157 int32_t CodePointBreakIterator::last(void)
158 {
159 	int32_t pos = (int32_t)utext_nativeLength(this->fText);
160 	UTEXT_SETNATIVEINDEX(this->fText, pos);
161 	this->lastCodePoint = U_SENTINEL;
162 
163 	return pos;
164 }
165 
previous(void)166 int32_t CodePointBreakIterator::previous(void)
167 {
168 	this->lastCodePoint = UTEXT_PREVIOUS32(this->fText);
169 	if (this->lastCodePoint == U_SENTINEL) {
170 		return BreakIterator::DONE;
171 	}
172 
173 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
174 }
175 
next(void)176 int32_t CodePointBreakIterator::next(void)
177 {
178 	this->lastCodePoint = UTEXT_NEXT32(this->fText);
179 	if (this->lastCodePoint == U_SENTINEL) {
180 		return BreakIterator::DONE;
181 	}
182 
183 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
184 }
185 
current(void) const186 int32_t CodePointBreakIterator::current(void) const
187 {
188 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
189 }
190 
following(int32_t offset)191 int32_t CodePointBreakIterator::following(int32_t offset)
192 {
193 	this->lastCodePoint = utext_next32From(this->fText, offset);
194 	if (this->lastCodePoint == U_SENTINEL) {
195 		return BreakIterator::DONE;
196 	}
197 
198 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
199 }
200 
preceding(int32_t offset)201 int32_t CodePointBreakIterator::preceding(int32_t offset)
202 {
203 	this->lastCodePoint = utext_previous32From(this->fText, offset);
204 	if (this->lastCodePoint == U_SENTINEL) {
205 		return BreakIterator::DONE;
206 	}
207 
208 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
209 }
210 
isBoundary(int32_t offset)211 UBool CodePointBreakIterator::isBoundary(int32_t offset)
212 {
213 	//this function has side effects, and it's supposed to
214 	utext_setNativeIndex(this->fText, offset);
215 	return (offset == utext_getNativeIndex(this->fText));
216 }
217 
next(int32_t n)218 int32_t CodePointBreakIterator::next(int32_t n)
219 {
220 	UBool res = utext_moveIndex32(this->fText, n);
221 
222 #ifndef UTEXT_CURRENT32
223 #define UTEXT_CURRENT32 utext_current32
224 #endif
225 
226 	if (res) {
227 		this->lastCodePoint = UTEXT_CURRENT32(this->fText);
228 		return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
229 	} else {
230 		this->lastCodePoint = U_SENTINEL;
231 		return BreakIterator::DONE;
232 	}
233 }
234 
createBufferClone(void * stackBuffer,int32_t & bufferSize,UErrorCode & status)235 CodePointBreakIterator *CodePointBreakIterator::createBufferClone(
236 	void *stackBuffer, int32_t &bufferSize, UErrorCode &status)
237 {
238 	//see implementation of RuleBasedBreakIterator::createBufferClone()
239 	if (U_FAILURE(status)) {
240 		return NULL;
241 	}
242 
243 	if (bufferSize <= 0) {
244 		bufferSize = sizeof(CodePointBreakIterator) + U_ALIGNMENT_OFFSET_UP(0);
245 		return NULL;
246 	}
247 
248 	char *buf = (char*)stackBuffer;
249 	uint32_t s = bufferSize;
250 
251 	if (stackBuffer == NULL) {
252 		 s = 0;
253 	}
254 
255 	if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
256 		uint32_t offsetUp = (uint32_t)U_ALIGNMENT_OFFSET_UP(buf);
257 		s -= offsetUp;
258 		buf += offsetUp;
259 	}
260 
261 	if (s < sizeof(CodePointBreakIterator)) {
262 		CodePointBreakIterator *clonedBI = new CodePointBreakIterator(*this);
263 		if (clonedBI == NULL) {
264 			status = U_MEMORY_ALLOCATION_ERROR;
265 		} else {
266 			status = U_SAFECLONE_ALLOCATED_WARNING;
267 		}
268 
269 		return clonedBI;
270 	}
271 
272 	return new(buf) CodePointBreakIterator(*this);
273 }
274 
refreshInputText(UText * input,UErrorCode & status)275 CodePointBreakIterator &CodePointBreakIterator::refreshInputText(UText *input, UErrorCode &status)
276 {
277 	//see implementation of RuleBasedBreakIterator::createBufferClone()
278 	if (U_FAILURE(status)) {
279 		return *this;
280 	}
281 	if (input == NULL) {
282 		status = U_ILLEGAL_ARGUMENT_ERROR;
283 		return *this;
284 	}
285 
286 	int64_t pos = utext_getNativeIndex(this->fText);
287 	this->fText = utext_clone(this->fText, input, FALSE, TRUE, &status);
288 	if (U_FAILURE(status)) {
289 		return *this;
290 	}
291 
292 	utext_setNativeIndex(this->fText, pos);
293 	if (utext_getNativeIndex(fText) != pos) {
294 		status = U_ILLEGAL_ARGUMENT_ERROR;
295 	}
296 
297 	return *this;
298 }
299