@@ -13,7 +13,7 @@ ZERO_WIDTH_JOINER :: '\u200D'
1313WORD_JOINER :: ' \u 2060'
1414
1515@(require_results)
16- binary_search :: proc (c: i32 , table: []i32 , length, stride: int , loc := #caller_location ) -> int #no_bounds_check {
16+ binary_search :: proc (c: $T , table: []T , length, stride: int , loc := #caller_location ) -> int #no_bounds_check {
1717 runtime.bounds_check_error_loc (loc, length*stride-1 , len (table))
1818 n := length
1919 t := 0
@@ -75,36 +75,30 @@ is_lower :: proc(r: rune) -> bool #no_bounds_check {
7575 if r <= MAX_ASCII {
7676 return u32 (r)-' a' < 26
7777 }
78- c := i32 (r)
79- p := binary_search (c, to_upper_ranges[:], len (to_upper_ranges)/3 , 3 )
80- if p >= 0 && to_upper_ranges[p] <= c && c <= to_upper_ranges[p+1 ] {
81- return true
82- }
83- p = binary_search (c, to_upper_singlets[:], len (to_upper_singlets)/2 , 2 )
84- if p >= 0 && c == to_upper_singlets[p] {
85- return true
86- }
87- return false
78+ return in_range (r, ll_ranges) || in_range (r, other_lowercase_ranges)
8879}
8980
9081@(require_results)
9182is_upper :: proc (r: rune ) -> bool #no_bounds_check {
9283 if r <= MAX_ASCII {
9384 return u32 (r)-' A' < 26
9485 }
95- c := i32 (r)
96- p := binary_search (c, to_lower_ranges[:], len (to_lower_ranges)/3 , 3 )
97- if p >= 0 && to_lower_ranges[p] <= c && c <= to_lower_ranges[p+1 ] {
98- return true
99- }
100- p = binary_search (c, to_lower_singlets[:], len (to_lower_singlets)/2 , 2 )
101- if p >= 0 && c == to_lower_singlets[p] {
102- return true
103- }
104- return false
86+ return in_range (r, lu_ranges) || in_range (r, other_uppercase_ranges)
10587}
10688
10789is_alpha :: is_letter
90+
91+ /*
92+ Return true if the rune `r` is a letter. Being a letter means that the rune has
93+ the Unicode general category property of L. In practice, the character will have
94+ a general category property of Ll, Lm, Lo, Lt, or Lu.
95+
96+ Inputs:
97+ - r: The rune which will be check for having the property of being a letter.
98+
99+ Returns:
100+ `true` when the rune `r` is a letter. `false` will be returned in all other cases.
101+ */
108102@(require_results)
109103is_letter :: proc (r: rune ) -> bool #no_bounds_check {
110104 if u32 (r) <= MAX_LATIN1 {
@@ -114,28 +108,55 @@ is_letter :: proc(r: rune) -> bool #no_bounds_check {
114108 return true
115109 }
116110
117- c := i32 (r)
118- p := binary_search (c, alpha_ranges[:], len (alpha_ranges)/2 , 2 )
119- if p >= 0 && alpha_ranges[p] <= c && c <= alpha_ranges[p+1 ] {
120- return true
121- }
122- p = binary_search (c, alpha_singlets[:], len (alpha_singlets), 1 )
123- if p >= 0 && c == alpha_singlets[p] {
124- return true
125- }
126- return false
111+ ll_lu := in_range (r, ll_ranges) || in_range (r, lu_ranges)
112+
113+ return ll_lu || in_range (r, lo_ranges) || in_range (r, lt_ranges) || in_range (r, lm_ranges)
127114}
128115
129116@(require_results)
130117is_title :: proc (r: rune ) -> bool {
131118 return is_upper (r) && is_lower (r)
132119}
133120
121+ /*
122+ Returns true if the rune `r` is in the General Category Nd
123+
124+ Inputs:
125+ - r: The run to check if it is in the general category Nd.
126+
127+ Returns:
128+ `true` if the rune is in the general category Nd and `false` otherwise
129+
130+ */
131+ is_decimal :: proc (r: rune ) -> bool {
132+ return in_range (r, nd_ranges)
133+ }
134+
135+ /*
136+ This function determincs if a rune is a digit. To be a digit the
137+ charage either has a Numeric_Type of Digit or Decimal.
138+
139+ Inputs:
140+ - r: The rune to check if it is a digit.
141+
142+ Returns:
143+ `true` if the rune `r` is a digit, `false` in all other cases
144+
145+ */
134146@(require_results)
135147is_digit :: proc (r: rune ) -> bool {
136148 if r <= MAX_LATIN1 {
137- return ' 0' <= r && r <= ' 9'
149+ return ( ' 0' <= r && r <= ' 9' ) || r == 0x00B9 || (r >= 0x00B2 && r <= 0x0B3 )
138150 }
151+
152+ if in_range (r, nd_ranges) {
153+ return true
154+ }
155+
156+ if in_range (r, extra_digits_ranges) {
157+ return true
158+ }
159+
139160 return false
140161}
141162
@@ -176,6 +197,15 @@ is_graphic :: proc(r: rune) -> bool {
176197 if u32 (r) <= MAX_LATIN1 {
177198 return char_properties[u8 (r)]&pg != 0
178199 }
200+
201+ if is_letter (r) || is_number (r) || is_punct (r) || is_symbol (r) || in_range (r, zs_ranges) {
202+ return true
203+ }
204+
205+ if in_range (r, mc_ranges) || in_range (r, me_ranges) || in_range (r, mn_ranges) {
206+ return true
207+ }
208+
179209 return false
180210}
181211
@@ -195,27 +225,56 @@ is_control :: proc(r: rune) -> bool #no_bounds_check {
195225 return false
196226}
197227
228+ /*
229+ Checks to see if the rune `r` is a number. This means the rune is a member
230+ of the general category Nd, Nl, or No.
231+
232+ Inputs:
233+ r: The rune to check if it is number.
234+
235+ Returns:
236+ `true` if the ruen belongs to the general category Nd, Nl, or No. `false`
237+ is return in all other cases.
238+
239+ */
198240@(require_results)
199241is_number :: proc (r: rune ) -> bool #no_bounds_check {
200242 if u32 (r) <= MAX_LATIN1 {
201243 return char_properties[u8 (r)]&pN != 0
202244 }
203- return false
245+
246+ return in_range (r, nd_ranges) || in_range (r, nl_ranges) || in_range (r, no_ranges)
204247}
205248
206249@(require_results)
207250is_punct :: proc (r: rune ) -> bool #no_bounds_check {
208251 if u32 (r) <= MAX_LATIN1 {
209252 return char_properties[u8 (r)]&pP != 0
210253 }
211- return false
254+
255+ if in_range (r, pc_ranges) || in_range (r, pd_ranges) || in_range (r, pe_ranges) {
256+ return true
257+ }
258+
259+ if in_range (r, pf_ranges) || in_range (r, pi_ranges) || in_range (r, po_ranges) {
260+ return true
261+ }
262+
263+ return in_range (r, ps_ranges)
212264}
213265
214266@(require_results)
215267is_symbol :: proc (r: rune ) -> bool #no_bounds_check {
216268 if u32 (r) <= MAX_LATIN1 {
217269 return char_properties[u8 (r)]&pS != 0
218270 }
271+
272+ s := in_range (r, sc_ranges) || in_range (r, sm_ranges)
273+
274+ if s || in_range (r, so_ranges) || in_range (r, sk_ranges) {
275+ return true
276+ }
277+
219278 return false
220279}
221280
0 commit comments