1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Gustavo Lopes <cataphract@php.net>                          |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include "codepointiterator_internal.h"
18 #include <unicode/uchriter.h>
19 #include <typeinfo>
20 
21 #include "php.h"
22 
23 //copied from cmemory.h, which is not public
24 typedef union {
25     zend_long    t1;
26     double  t2;
27     void   *t3;
28 } UAlignedMemory;
29 
30 #define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
31 #define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
32 #define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
33 
34 using namespace PHP;
35 
36 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CodePointBreakIterator);
37 
CodePointBreakIterator()38 CodePointBreakIterator::CodePointBreakIterator()
39 : BreakIterator(), fCharIter(NULL), lastCodePoint(U_SENTINEL)
40 {
41 	UErrorCode uec = UErrorCode();
42 	this->fText = utext_openUChars(NULL, NULL, 0, &uec);
43 }
44 
CodePointBreakIterator(const PHP::CodePointBreakIterator & other)45 CodePointBreakIterator::CodePointBreakIterator(const PHP::CodePointBreakIterator &other)
46 : BreakIterator(other), fText(NULL), fCharIter(NULL), lastCodePoint(U_SENTINEL)
47 {
48 	*this = other;
49 }
50 
operator =(const CodePointBreakIterator & that)51 CodePointBreakIterator& CodePointBreakIterator::operator=(const CodePointBreakIterator& that)
52 {
53 	UErrorCode uec = UErrorCode();
54 	UText *ut_clone = NULL;
55 
56 	if (this == &that) {
57 		return *this;
58 	}
59 
60 	this->fText = utext_clone(this->fText, that.fText, FALSE, TRUE, &uec);
61 
62 	//don't bother copying the character iterator, getText() is deprecated
63 	clearCurrentCharIter();
64 
65 	this->lastCodePoint = that.lastCodePoint;
66 	return *this;
67 }
68 
~CodePointBreakIterator()69 CodePointBreakIterator::~CodePointBreakIterator()
70 {
71 	if (this->fText) {
72 		utext_close(this->fText);
73 	}
74 	clearCurrentCharIter();
75 }
76 
operator ==(const BreakIterator & that) const77 UBool CodePointBreakIterator::operator==(const BreakIterator& that) const
78 {
79 	if (typeid(*this) != typeid(that)) {
80 		return FALSE;
81 	}
82 
83 	const CodePointBreakIterator& that2 =
84 		static_cast<const CodePointBreakIterator&>(that);
85 
86 	if (!utext_equals(this->fText, that2.fText)) {
87 		return FALSE;
88 	}
89 
90 	return TRUE;
91 }
92 
clone(void) const93 CodePointBreakIterator* CodePointBreakIterator::clone(void) const
94 {
95 	return new CodePointBreakIterator(*this);
96 }
97 
getText(void) const98 CharacterIterator& CodePointBreakIterator::getText(void) const
99 {
100 	if (this->fCharIter == NULL) {
101 		//this method is deprecated anyway; setup bogus iterator
102 		static const UChar c = 0;
103 		this->fCharIter = new UCharCharacterIterator(&c, 0);
104 	}
105 
106 	return *this->fCharIter;
107 }
108 
getUText(UText * fillIn,UErrorCode & status) const109 UText *CodePointBreakIterator::getUText(UText *fillIn, UErrorCode &status) const
110 {
111 	return utext_clone(fillIn, this->fText, FALSE, TRUE, &status);
112 }
113 
setText(const UnicodeString & text)114 void CodePointBreakIterator::setText(const UnicodeString &text)
115 {
116 	UErrorCode uec = UErrorCode();
117 
118 	//this closes the previous utext, if any
119 	this->fText = utext_openConstUnicodeString(this->fText, &text, &uec);
120 
121 	clearCurrentCharIter();
122 }
123 
setText(UText * text,UErrorCode & status)124 void CodePointBreakIterator::setText(UText *text, UErrorCode &status)
125 {
126 	if (U_FAILURE(status)) {
127 		return;
128 	}
129 
130 	this->fText = utext_clone(this->fText, text, FALSE, TRUE, &status);
131 
132 	clearCurrentCharIter();
133 }
134 
adoptText(CharacterIterator * it)135 void CodePointBreakIterator::adoptText(CharacterIterator* it)
136 {
137 	UErrorCode uec = UErrorCode();
138 	clearCurrentCharIter();
139 
140 	this->fCharIter = it;
141 	this->fText = utext_openCharacterIterator(this->fText, it, &uec);
142 }
143 
first(void)144 int32_t CodePointBreakIterator::first(void)
145 {
146 	UTEXT_SETNATIVEINDEX(this->fText, 0);
147 	this->lastCodePoint = U_SENTINEL;
148 
149 	return 0;
150 }
151 
last(void)152 int32_t CodePointBreakIterator::last(void)
153 {
154 	int32_t pos = (int32_t)utext_nativeLength(this->fText);
155 	UTEXT_SETNATIVEINDEX(this->fText, pos);
156 	this->lastCodePoint = U_SENTINEL;
157 
158 	return pos;
159 }
160 
previous(void)161 int32_t CodePointBreakIterator::previous(void)
162 {
163 	this->lastCodePoint = UTEXT_PREVIOUS32(this->fText);
164 	if (this->lastCodePoint == U_SENTINEL) {
165 		return BreakIterator::DONE;
166 	}
167 
168 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
169 }
170 
next(void)171 int32_t CodePointBreakIterator::next(void)
172 {
173 	this->lastCodePoint = UTEXT_NEXT32(this->fText);
174 	if (this->lastCodePoint == U_SENTINEL) {
175 		return BreakIterator::DONE;
176 	}
177 
178 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
179 }
180 
current(void) const181 int32_t CodePointBreakIterator::current(void) const
182 {
183 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
184 }
185 
following(int32_t offset)186 int32_t CodePointBreakIterator::following(int32_t offset)
187 {
188 	this->lastCodePoint = utext_next32From(this->fText, offset);
189 	if (this->lastCodePoint == U_SENTINEL) {
190 		return BreakIterator::DONE;
191 	}
192 
193 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
194 }
195 
preceding(int32_t offset)196 int32_t CodePointBreakIterator::preceding(int32_t offset)
197 {
198 	this->lastCodePoint = utext_previous32From(this->fText, offset);
199 	if (this->lastCodePoint == U_SENTINEL) {
200 		return BreakIterator::DONE;
201 	}
202 
203 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
204 }
205 
isBoundary(int32_t offset)206 UBool CodePointBreakIterator::isBoundary(int32_t offset)
207 {
208 	//this function has side effects, and it's supposed to
209 	utext_setNativeIndex(this->fText, offset);
210 	return (offset == utext_getNativeIndex(this->fText));
211 }
212 
next(int32_t n)213 int32_t CodePointBreakIterator::next(int32_t n)
214 {
215 	UBool res = utext_moveIndex32(this->fText, n);
216 
217 #ifndef UTEXT_CURRENT32
218 #define UTEXT_CURRENT32 utext_current32
219 #endif
220 
221 	if (res) {
222 		this->lastCodePoint = UTEXT_CURRENT32(this->fText);
223 		return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
224 	} else {
225 		this->lastCodePoint = U_SENTINEL;
226 		return BreakIterator::DONE;
227 	}
228 }
229 
createBufferClone(void * stackBuffer,int32_t & bufferSize,UErrorCode & status)230 CodePointBreakIterator *CodePointBreakIterator::createBufferClone(
231 	void *stackBuffer, int32_t &bufferSize, UErrorCode &status)
232 {
233 	//see implementation of RuleBasedBreakIterator::createBufferClone()
234 	if (U_FAILURE(status)) {
235 		return NULL;
236 	}
237 
238 	if (bufferSize <= 0) {
239 		bufferSize = sizeof(CodePointBreakIterator) + U_ALIGNMENT_OFFSET_UP(0);
240 		return NULL;
241 	}
242 
243 	char *buf = (char*)stackBuffer;
244 	uint32_t s = bufferSize;
245 
246 	if (stackBuffer == NULL) {
247 		 s = 0;
248 	}
249 
250 	if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
251 		uint32_t offsetUp = (uint32_t)U_ALIGNMENT_OFFSET_UP(buf);
252 		s -= offsetUp;
253 		buf += offsetUp;
254 	}
255 
256 	if (s < sizeof(CodePointBreakIterator)) {
257 		CodePointBreakIterator *clonedBI = new CodePointBreakIterator(*this);
258 		if (clonedBI == NULL) {
259 			status = U_MEMORY_ALLOCATION_ERROR;
260 		} else {
261 			status = U_SAFECLONE_ALLOCATED_WARNING;
262 		}
263 
264 		return clonedBI;
265 	}
266 
267 	return new(buf) CodePointBreakIterator(*this);
268 }
269 
refreshInputText(UText * input,UErrorCode & status)270 CodePointBreakIterator &CodePointBreakIterator::refreshInputText(UText *input, UErrorCode &status)
271 {
272 	//see implementation of RuleBasedBreakIterator::createBufferClone()
273 	if (U_FAILURE(status)) {
274 		return *this;
275 	}
276 	if (input == NULL) {
277 		status = U_ILLEGAL_ARGUMENT_ERROR;
278 		return *this;
279 	}
280 
281 	int64_t pos = utext_getNativeIndex(this->fText);
282 	this->fText = utext_clone(this->fText, input, FALSE, TRUE, &status);
283 	if (U_FAILURE(status)) {
284 		return *this;
285 	}
286 
287 	utext_setNativeIndex(this->fText, pos);
288 	if (utext_getNativeIndex(fText) != pos) {
289 		status = U_ILLEGAL_ARGUMENT_ERROR;
290 	}
291 
292 	return *this;
293 }
294