1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Gustavo Lopes <cataphract@php.net>                          |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include "codepointiterator_internal.h"
18 #include <unicode/uchriter.h>
19 #include <typeinfo>
20 
21 //copied from cmemory.h, which is not public
22 typedef union {
23     long    t1;
24     double  t2;
25     void   *t3;
26 } UAlignedMemory;
27 
28 #define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
29 #define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
30 #define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
31 
32 using namespace PHP;
33 
34 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CodePointBreakIterator);
35 
CodePointBreakIterator()36 CodePointBreakIterator::CodePointBreakIterator()
37 : BreakIterator(), fCharIter(NULL), lastCodePoint(U_SENTINEL)
38 {
39 	UErrorCode uec = UErrorCode();
40 	this->fText = utext_openUChars(NULL, NULL, 0, &uec);
41 }
42 
CodePointBreakIterator(const PHP::CodePointBreakIterator & other)43 CodePointBreakIterator::CodePointBreakIterator(const PHP::CodePointBreakIterator &other)
44 : BreakIterator(other), fText(NULL), fCharIter(NULL), lastCodePoint(U_SENTINEL)
45 {
46 	*this = other;
47 }
48 
operator =(const CodePointBreakIterator & that)49 CodePointBreakIterator& CodePointBreakIterator::operator=(const CodePointBreakIterator& that)
50 {
51 	UErrorCode uec = UErrorCode();
52 	UText *ut_clone = NULL;
53 
54 	if (this == &that) {
55 		return *this;
56 	}
57 
58 	this->fText = utext_clone(this->fText, that.fText, FALSE, TRUE, &uec);
59 
60 	//don't bother copying the character iterator, getText() is deprecated
61 	clearCurrentCharIter();
62 
63 	this->lastCodePoint = that.lastCodePoint;
64 	return *this;
65 }
66 
~CodePointBreakIterator()67 CodePointBreakIterator::~CodePointBreakIterator()
68 {
69 	if (this->fText) {
70 		utext_close(this->fText);
71 	}
72 	clearCurrentCharIter();
73 }
74 
operator ==(const BreakIterator & that) const75 UBool CodePointBreakIterator::operator==(const BreakIterator& that) const
76 {
77 	if (typeid(*this) != typeid(that)) {
78 		return FALSE;
79 	}
80 
81 	const CodePointBreakIterator& that2 =
82 		static_cast<const CodePointBreakIterator&>(that);
83 
84 	if (!utext_equals(this->fText, that2.fText)) {
85 		return FALSE;
86 	}
87 
88 	return TRUE;
89 }
90 
clone(void) const91 CodePointBreakIterator* CodePointBreakIterator::clone(void) const
92 {
93 	return new CodePointBreakIterator(*this);
94 }
95 
getText(void) const96 CharacterIterator& CodePointBreakIterator::getText(void) const
97 {
98 	if (this->fCharIter == NULL) {
99 		//this method is deprecated anyway; setup bogus iterator
100 		static const UChar c = 0;
101 		this->fCharIter = new UCharCharacterIterator(&c, 0);
102 	}
103 
104 	return *this->fCharIter;
105 }
106 
getUText(UText * fillIn,UErrorCode & status) const107 UText *CodePointBreakIterator::getUText(UText *fillIn, UErrorCode &status) const
108 {
109 	return utext_clone(fillIn, this->fText, FALSE, TRUE, &status);
110 }
111 
setText(const UnicodeString & text)112 void CodePointBreakIterator::setText(const UnicodeString &text)
113 {
114 	UErrorCode uec = UErrorCode();
115 
116 	//this closes the previous utext, if any
117 	this->fText = utext_openConstUnicodeString(this->fText, &text, &uec);
118 
119 	clearCurrentCharIter();
120 }
121 
setText(UText * text,UErrorCode & status)122 void CodePointBreakIterator::setText(UText *text, UErrorCode &status)
123 {
124 	if (U_FAILURE(status)) {
125 		return;
126 	}
127 
128 	this->fText = utext_clone(this->fText, text, FALSE, TRUE, &status);
129 
130 	clearCurrentCharIter();
131 }
132 
adoptText(CharacterIterator * it)133 void CodePointBreakIterator::adoptText(CharacterIterator* it)
134 {
135 	UErrorCode uec = UErrorCode();
136 	clearCurrentCharIter();
137 
138 	this->fCharIter = it;
139 	this->fText = utext_openCharacterIterator(this->fText, it, &uec);
140 }
141 
first(void)142 int32_t CodePointBreakIterator::first(void)
143 {
144 	UTEXT_SETNATIVEINDEX(this->fText, 0);
145 	this->lastCodePoint = U_SENTINEL;
146 
147 	return 0;
148 }
149 
last(void)150 int32_t CodePointBreakIterator::last(void)
151 {
152 	int32_t pos = (int32_t)utext_nativeLength(this->fText);
153 	UTEXT_SETNATIVEINDEX(this->fText, pos);
154 	this->lastCodePoint = U_SENTINEL;
155 
156 	return pos;
157 }
158 
previous(void)159 int32_t CodePointBreakIterator::previous(void)
160 {
161 	this->lastCodePoint = UTEXT_PREVIOUS32(this->fText);
162 	if (this->lastCodePoint == U_SENTINEL) {
163 		return BreakIterator::DONE;
164 	}
165 
166 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
167 }
168 
next(void)169 int32_t CodePointBreakIterator::next(void)
170 {
171 	this->lastCodePoint = UTEXT_NEXT32(this->fText);
172 	if (this->lastCodePoint == U_SENTINEL) {
173 		return BreakIterator::DONE;
174 	}
175 
176 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
177 }
178 
current(void) const179 int32_t CodePointBreakIterator::current(void) const
180 {
181 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
182 }
183 
following(int32_t offset)184 int32_t CodePointBreakIterator::following(int32_t offset)
185 {
186 	this->lastCodePoint = utext_next32From(this->fText, offset);
187 	if (this->lastCodePoint == U_SENTINEL) {
188 		return BreakIterator::DONE;
189 	}
190 
191 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
192 }
193 
preceding(int32_t offset)194 int32_t CodePointBreakIterator::preceding(int32_t offset)
195 {
196 	this->lastCodePoint = utext_previous32From(this->fText, offset);
197 	if (this->lastCodePoint == U_SENTINEL) {
198 		return BreakIterator::DONE;
199 	}
200 
201 	return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
202 }
203 
isBoundary(int32_t offset)204 UBool CodePointBreakIterator::isBoundary(int32_t offset)
205 {
206 	//this function has side effects, and it's supposed to
207 	utext_setNativeIndex(this->fText, offset);
208 	return (offset == utext_getNativeIndex(this->fText));
209 }
210 
next(int32_t n)211 int32_t CodePointBreakIterator::next(int32_t n)
212 {
213 	UBool res = utext_moveIndex32(this->fText, n);
214 
215 #ifndef UTEXT_CURRENT32
216 #define UTEXT_CURRENT32 utext_current32
217 #endif
218 
219 	if (res) {
220 		this->lastCodePoint = UTEXT_CURRENT32(this->fText);
221 		return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
222 	} else {
223 		this->lastCodePoint = U_SENTINEL;
224 		return BreakIterator::DONE;
225 	}
226 }
227 
createBufferClone(void * stackBuffer,int32_t & bufferSize,UErrorCode & status)228 CodePointBreakIterator *CodePointBreakIterator::createBufferClone(
229 	void *stackBuffer, int32_t &bufferSize, UErrorCode &status)
230 {
231 	//see implementation of RuleBasedBreakIterator::createBufferClone()
232 	if (U_FAILURE(status)) {
233 		return NULL;
234 	}
235 
236 	if (bufferSize <= 0) {
237 		bufferSize = sizeof(CodePointBreakIterator) + U_ALIGNMENT_OFFSET_UP(0);
238 		return NULL;
239 	}
240 
241 	char *buf = (char*)stackBuffer;
242 	uint32_t s = bufferSize;
243 
244 	if (stackBuffer == NULL) {
245 		 s = 0;
246 	}
247 
248 	if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
249 		uint32_t offsetUp = (uint32_t)U_ALIGNMENT_OFFSET_UP(buf);
250 		s -= offsetUp;
251 		buf += offsetUp;
252 	}
253 
254 	if (s < sizeof(CodePointBreakIterator)) {
255 		CodePointBreakIterator *clonedBI = new CodePointBreakIterator(*this);
256 		if (clonedBI == NULL) {
257 			status = U_MEMORY_ALLOCATION_ERROR;
258 		} else {
259 			status = U_SAFECLONE_ALLOCATED_WARNING;
260 		}
261 
262 		return clonedBI;
263 	}
264 
265 	return new(buf) CodePointBreakIterator(*this);
266 }
267 
refreshInputText(UText * input,UErrorCode & status)268 CodePointBreakIterator &CodePointBreakIterator::refreshInputText(UText *input, UErrorCode &status)
269 {
270 	//see implementation of RuleBasedBreakIterator::createBufferClone()
271 	if (U_FAILURE(status)) {
272 		return *this;
273 	}
274 	if (input == NULL) {
275 		status = U_ILLEGAL_ARGUMENT_ERROR;
276 		return *this;
277 	}
278 
279 	int64_t pos = utext_getNativeIndex(this->fText);
280 	this->fText = utext_clone(this->fText, input, FALSE, TRUE, &status);
281 	if (U_FAILURE(status)) {
282 		return *this;
283 	}
284 
285 	utext_setNativeIndex(this->fText, pos);
286 	if (utext_getNativeIndex(fText) != pos) {
287 		status = U_ILLEGAL_ARGUMENT_ERROR;
288 	}
289 
290 	return *this;
291 }
292