00001 /* ==================================================================== 00002 * Licensed to the Apache Software Foundation (ASF) under one 00003 * or more contributor license agreements. See the NOTICE file 00004 * distributed with this work for additional information 00005 * regarding copyright ownership. The ASF licenses this file 00006 * to you under the Apache License, Version 2.0 (the 00007 * "License"); you may not use this file except in compliance 00008 * with the License. You may obtain a copy of the License at 00009 * 00010 * http://www.apache.org/licenses/LICENSE-2.0 00011 * 00012 * Unless required by applicable law or agreed to in writing, 00013 * software distributed under the License is distributed on an 00014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 00015 * KIND, either express or implied. See the License for the 00016 * specific language governing permissions and limitations 00017 * under the License. 00018 * ==================================================================== 00019 */ 00020 00021 /** 00022 * @file apr_cstr.h 00023 * @brief C string goodies. 00024 */ 00025 00026 #ifndef APR_CSTR_H 00027 #define APR_CSTR_H 00028 00029 #include <apr.h> /* for apr_size_t */ 00030 #include <apr_pools.h> /* for apr_pool_t */ 00031 #include <apr_tables.h> /* for apr_array_header_t */ 00032 00033 #ifdef __cplusplus 00034 extern "C" { 00035 #endif /* __cplusplus */ 00036 00037 /** 00038 * @defgroup apr_cstr C (POSIX) locale string functions 00039 * @ingroup apr_strings 00040 * 00041 * The apr_cstr_* functions provide traditional C char * string text handling, 00042 * and notabilty they treat all text in the C (a.k.a. POSIX) locale using the 00043 * minimal POSIX character set, represented in either ASCII or a corresponding 00044 * EBCDIC subset. 00045 * 00046 * Character values outside of that set are treated as opaque bytes, and all 00047 * multi-byte character sequences are handled as individual distinct octets. 00048 * 00049 * Multi-byte characters sequences whose octets fall in the ASCII range cause 00050 * unexpected results, such as in the ISO-2022-JP code page where ASCII octets 00051 * occur within both shift-state and multibyte sequences. 00052 * 00053 * In the case of the UTF-8 encoding, all multibyte characters all fall outside 00054 * of the C/POSIX range of characters, so these functions are generally safe 00055 * to use on UTF-8 strings. The programmer must be aware that each octet may 00056 * not represent a distinct printable character in such encodings. 00057 * 00058 * The standard C99/POSIX string functions, rather than apr_cstr, should be 00059 * used in all cases where the current locale and encoding of the text is 00060 * significant. 00061 * @{ 00062 */ 00063 00064 00065 /** Divide @a input into substrings, interpreting any char from @a sep 00066 * as a token separator. 00067 * 00068 * Return an array of copies of those substrings (plain const char*), 00069 * allocating both the array and the copies in @a pool. 00070 * 00071 * None of the elements added to the array contain any of the 00072 * characters in @a sep_chars, and none of the new elements are empty 00073 * (thus, it is possible that the returned array will have length 00074 * zero). 00075 * 00076 * If @a chop_whitespace is TRUE, then remove leading and trailing 00077 * whitespace from the returned strings. 00078 * 00079 * @since New in 1.6 00080 */ 00081 APR_DECLARE(apr_array_header_t *) apr_cstr_split(const char *input, 00082 const char *sep_chars, 00083 int chop_whitespace, 00084 apr_pool_t *pool); 00085 00086 /** Like apr_cstr_split(), but append to existing @a array instead of 00087 * creating a new one. Allocate the copied substrings in @a pool 00088 * (i.e., caller decides whether or not to pass @a array->pool as @a pool). 00089 * 00090 * @since New in 1.6 00091 */ 00092 APR_DECLARE(void) apr_cstr_split_append(apr_array_header_t *array, 00093 const char *input, 00094 const char *sep_chars, 00095 int chop_whitespace, 00096 apr_pool_t *pool); 00097 00098 00099 /** Return @c TRUE iff @a str matches any of the elements of @a list, a list 00100 * of zero or more glob patterns. 00101 * 00102 * @since New in 1.6 00103 */ 00104 APR_DECLARE(int) apr_cstr_match_glob_list(const char *str, 00105 const apr_array_header_t *list); 00106 00107 /** Return @c TRUE iff @a str exactly matches any of the elements of @a list. 00108 * 00109 * @since New in 1.6 00110 */ 00111 APR_DECLARE(int) apr_cstr_match_list(const char *str, 00112 const apr_array_header_t *list); 00113 00114 /** 00115 * Get the next token from @a *str interpreting any char from @a sep as a 00116 * token separator. Separators at the beginning of @a str will be skipped. 00117 * Returns a pointer to the beginning of the first token in @a *str or NULL 00118 * if no token is left. Modifies @a str such that the next call will return 00119 * the next token. 00120 * 00121 * @note The content of @a *str may be modified by this function. 00122 * 00123 * @since New in 1.6. 00124 */ 00125 APR_DECLARE(char *) apr_cstr_tokenize(const char *sep, char **str); 00126 00127 /** 00128 * Return the number of line breaks in @a msg, allowing any kind of newline 00129 * termination (CR, LF, CRLF, or LFCR), even inconsistent. 00130 * 00131 * @since New in 1.6. 00132 */ 00133 APR_DECLARE(int) apr_cstr_count_newlines(const char *msg); 00134 00135 #if 0 /* XXX: stringbuf logic is not present in APR */ 00136 /** 00137 * Return a cstring which is the concatenation of @a strings (an array 00138 * of char *) each followed by @a separator (that is, @a separator 00139 * will also end the resulting string). Allocate the result in @a pool. 00140 * If @a strings is empty, then return the empty string. 00141 * 00142 * @since New in 1.6. 00143 */ 00144 APR_DECLARE(char *) apr_cstr_join(const apr_array_header_t *strings, 00145 const char *separator, 00146 apr_pool_t *pool); 00147 #endif 00148 00149 /** 00150 * Perform a case-insensitive comparison of two strings @a atr1 and @a atr2, 00151 * treating upper and lower case values of the 26 standard C/POSIX alphabetic 00152 * characters as equivalent. Extended latin characters outside of this set 00153 * are treated as unique octets, irrespective of the current locale. 00154 * 00155 * Returns in integer greater than, equal to, or less than 0, 00156 * according to whether @a str1 is considered greater than, equal to, 00157 * or less than @a str2. 00158 * 00159 * @since New in 1.6. 00160 */ 00161 APR_DECLARE(int) apr_cstr_casecmp(const char *str1, const char *str2); 00162 00163 /** 00164 * Perform a case-insensitive comparison of two strings @a atr1 and @a atr2, 00165 * treating upper and lower case values of the 26 standard C/POSIX alphabetic 00166 * characters as equivalent. Extended latin characters outside of this set 00167 * are treated as unique octets, irrespective of the current locale. 00168 * 00169 * Returns in integer greater than, equal to, or less than 0, 00170 * according to whether @a str1 is considered greater than, equal to, 00171 * or less than @a str2. 00172 * 00173 * @since New in 1.6. 00174 */ 00175 APR_DECLARE(int) apr_cstr_casecmpn(const char *str1, 00176 const char *str2, 00177 apr_size_t n); 00178 00179 /** 00180 * Parse the C string @a str into a 64 bit number, and return it in @a *n. 00181 * Assume that the number is represented in base @a base. 00182 * Raise an error if conversion fails (e.g. due to overflow), or if the 00183 * converted number is smaller than @a minval or larger than @a maxval. 00184 * 00185 * Leading whitespace in @a str is skipped in a locale-dependent way. 00186 * After that, the string may contain an optional '+' (positive, default) 00187 * or '-' (negative) character, followed by an optional '0x' prefix if 00188 * @a base is 0 or 16, followed by numeric digits appropriate for the base. 00189 * If there are any more characters after the numeric digits, an error is 00190 * returned. 00191 * 00192 * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal, 00193 * else a leading '0' means octal (implemented, though not documented, in 00194 * apr_strtoi64() in APR 0.9.0 through 1.5.0), else use base ten. 00195 * 00196 * @since New in 1.6. 00197 */ 00198 APR_DECLARE(apr_status_t) apr_cstr_strtoi64(apr_int64_t *n, const char *str, 00199 apr_int64_t minval, 00200 apr_int64_t maxval, 00201 int base); 00202 00203 /** 00204 * Parse the C string @a str into a 64 bit number, and return it in @a *n. 00205 * Assume that the number is represented in base 10. 00206 * Raise an error if conversion fails (e.g. due to overflow). 00207 * 00208 * The behaviour otherwise is as described for apr_cstr_strtoi64(). 00209 * 00210 * @since New in 1.6. 00211 */ 00212 APR_DECLARE(apr_status_t) apr_cstr_atoi64(apr_int64_t *n, const char *str); 00213 00214 /** 00215 * Parse the C string @a str into a 32 bit number, and return it in @a *n. 00216 * Assume that the number is represented in base 10. 00217 * Raise an error if conversion fails (e.g. due to overflow). 00218 * 00219 * The behaviour otherwise is as described for apr_cstr_strtoi64(). 00220 * 00221 * @since New in 1.6. 00222 */ 00223 APR_DECLARE(apr_status_t) apr_cstr_atoi(int *n, const char *str); 00224 00225 /** 00226 * Parse the C string @a str into an unsigned 64 bit number, and return 00227 * it in @a *n. Assume that the number is represented in base @a base. 00228 * Raise an error if conversion fails (e.g. due to overflow), or if the 00229 * converted number is smaller than @a minval or larger than @a maxval. 00230 * 00231 * Leading whitespace in @a str is skipped in a locale-dependent way. 00232 * After that, the string may contain an optional '+' (positive, default) 00233 * or '-' (negative) character, followed by an optional '0x' prefix if 00234 * @a base is 0 or 16, followed by numeric digits appropriate for the base. 00235 * If there are any more characters after the numeric digits, an error is 00236 * returned. 00237 * 00238 * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal, 00239 * else a leading '0' means octal (as implemented, though not documented, in 00240 * apr_strtoi64(), else use base ten. 00241 * 00242 * @warning The implementation returns APR_ERANGE if the parsed number 00243 * is greater than APR_INT64_MAX, even if it is not greater than @a maxval. 00244 * 00245 * @since New in 1.6. 00246 */ 00247 APR_DECLARE(apr_status_t) apr_cstr_strtoui64(apr_uint64_t *n, const char *str, 00248 apr_uint64_t minval, 00249 apr_uint64_t maxval, 00250 int base); 00251 00252 /** 00253 * Parse the C string @a str into an unsigned 64 bit number, and return 00254 * it in @a *n. Assume that the number is represented in base 10. 00255 * Raise an error if conversion fails (e.g. due to overflow). 00256 * 00257 * The behaviour otherwise is as described for apr_cstr_strtoui64(), 00258 * including the upper limit of APR_INT64_MAX. 00259 * 00260 * @since New in 1.6. 00261 */ 00262 APR_DECLARE(apr_status_t) apr_cstr_atoui64(apr_uint64_t *n, const char *str); 00263 00264 /** 00265 * Parse the C string @a str into an unsigned 32 bit number, and return 00266 * it in @a *n. Assume that the number is represented in base 10. 00267 * Raise an error if conversion fails (e.g. due to overflow). 00268 * 00269 * The behaviour otherwise is as described for apr_cstr_strtoui64(), 00270 * including the upper limit of APR_INT64_MAX. 00271 * 00272 * @since New in 1.6. 00273 */ 00274 APR_DECLARE(apr_status_t) apr_cstr_atoui(unsigned int *n, const char *str); 00275 00276 /** 00277 * Skip the common prefix @a prefix from the C string @a str, and return 00278 * a pointer to the next character after the prefix. 00279 * Return @c NULL if @a str does not start with @a prefix. 00280 * 00281 * @since New in 1.6. 00282 */ 00283 APR_DECLARE(const char *) apr_cstr_skip_prefix(const char *str, 00284 const char *prefix); 00285 00286 /** @} */ 00287 00288 #ifdef __cplusplus 00289 } 00290 #endif /* __cplusplus */ 00291 00292 #endif /* SVN_STRING_H */