1 #ifndef REGENC_H 2 #define REGENC_H 3 /********************************************************************** 4 regenc.h - Oniguruma (regular expression library) 5 **********************************************************************/ 6 /*- 7 * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #ifndef RUBY_PLATFORM 33 #include "config.h" 34 #endif 35 #include "oniguruma.h" 36 37 #ifndef NULL 38 #define NULL ((void* )0) 39 #endif 40 41 #ifndef TRUE 42 #define TRUE 1 43 #endif 44 45 #ifndef FALSE 46 #define FALSE 0 47 #endif 48 49 /* error codes */ 50 #define ONIGENCERR_MEMORY -5 51 #define ONIGENCERR_TYPE_BUG -6 52 #define ONIGENCERR_INVALID_WIDE_CHAR_VALUE -400 53 #define ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE -401 54 55 #define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) 56 #define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) 57 #define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL 58 #define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) 59 60 61 #ifdef ONIG_RUBY_M17N 62 63 #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_UNDEF 64 65 #else /* ONIG_RUBY_M17N */ 66 67 #define USE_UNICODE_FULL_RANGE_CTYPE 68 /* following must not use with USE_CRNL_AS_LINE_TERMINATOR */ 69 /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ 70 71 #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII 72 73 /* for encoding system implementation (internal) */ 74 ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs)); 75 ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs)); 76 ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs)); 77 ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs)); 78 ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])); 79 ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); 80 81 /* methods for single byte encoding */ 82 ONIG_EXTERN int onigenc_ascii_mbc_to_normalize P_((OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower)); 83 ONIG_EXTERN int onigenc_ascii_is_mbc_ambiguous P_((OnigAmbigType flag, const UChar** p, const UChar* end)); 84 ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); 85 ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); 86 ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); 87 ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code)); 88 ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); 89 ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); 90 ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); 91 ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); 92 93 /* methods for multi byte encoding */ 94 ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); 95 ONIG_EXTERN int onigenc_mbn_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower)); 96 ONIG_EXTERN int onigenc_mbn_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end)); 97 ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); 98 ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code)); 99 ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); 100 ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); 101 ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); 102 ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code)); 103 ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); 104 ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); 105 106 ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes)); 107 108 /* in enc/unicode.c */ 109 ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); 110 ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])); 111 112 113 #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ 114 OnigEncISO_8859_1_ToLowerCaseTable[c] 115 #define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \ 116 OnigEncISO_8859_1_ToUpperCaseTable[c] 117 #define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ 118 ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0) 119 120 ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; 121 ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; 122 ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[]; 123 ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[]; 124 125 #endif /* is not ONIG_RUBY_M17N */ 126 127 ONIG_EXTERN int 128 onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n)); 129 ONIG_EXTERN UChar* 130 onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n)); 131 132 /* defined in regexec.c, but used in enc/xxx.c */ 133 extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); 134 135 ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; 136 ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable; 137 ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; 138 ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; 139 140 #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] 141 #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c] 142 #define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \ 143 ((OnigEncAsciiCtypeTable[code] & ctype) != 0) 144 #define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \ 145 ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER)) 146 147 #endif /* REGENC_H */ 148