avstring.h 13 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2007 Mans Rullgard
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

21 22
#ifndef AVUTIL_AVSTRING_H
#define AVUTIL_AVSTRING_H
23 24

#include <stddef.h>
25
#include <stdint.h>
26
#include "attributes.h"
27

28 29 30 31 32
/**
 * @addtogroup lavu_string
 * @{
 */

33 34 35 36 37 38
/**
 * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to
 * the address of the first character in str after the prefix.
 *
 * @param str input string
 * @param pfx prefix to test
39
 * @param ptr updated if the prefix is matched inside str
40 41 42 43 44 45 46 47 48 49 50
 * @return non-zero if the prefix matches, zero otherwise
 */
int av_strstart(const char *str, const char *pfx, const char **ptr);

/**
 * Return non-zero if pfx is a prefix of str independent of case. If
 * it is, *ptr is set to the address of the first character in str
 * after the prefix.
 *
 * @param str input string
 * @param pfx prefix to test
51
 * @param ptr updated if the prefix is matched inside str
52 53 54 55
 * @return non-zero if the prefix matches, zero otherwise
 */
int av_stristart(const char *str, const char *pfx, const char **ptr);

56
/**
57 58 59
 * Locate the first case-independent occurrence in the string haystack
 * of the string needle.  A zero-length string needle is considered to
 * match at the start of haystack.
60 61 62
 *
 * This function is a case-insensitive version of the standard strstr().
 *
63 64 65 66
 * @param haystack string to search in
 * @param needle   string to search for
 * @return         pointer to the located match within haystack
 *                 or a null pointer if no match
67
 */
68
char *av_stristr(const char *haystack, const char *needle);
69

70 71 72 73 74 75 76 77 78 79 80 81 82 83
/**
 * Locate the first occurrence of the string needle in the string haystack
 * where not more than hay_length characters are searched. A zero-length
 * string needle is considered to match at the start of haystack.
 *
 * This function is a length-limited version of the standard strstr().
 *
 * @param haystack   string to search in
 * @param needle     string to search for
 * @param hay_length length of string to search in
 * @return           pointer to the located match within haystack
 *                   or a null pointer if no match
 */
char *av_strnstr(const char *haystack, const char *needle, size_t hay_length);
84

85 86
/**
 * Copy the string src to dst, but no more than size - 1 bytes, and
87
 * null-terminate dst.
88 89 90 91 92 93 94
 *
 * This function is the same as BSD strlcpy().
 *
 * @param dst destination buffer
 * @param src source string
 * @param size size of destination buffer
 * @return the length of src
95
 *
96
 * @warning since the return value is the length of src, src absolutely
97 98
 * _must_ be a properly 0-terminated string, otherwise this will read beyond
 * the end of the buffer and possibly crash.
99 100 101 102 103
 */
size_t av_strlcpy(char *dst, const char *src, size_t size);

/**
 * Append the string src to the string dst, but to a total length of
104
 * no more than size - 1 bytes, and null-terminate dst.
105 106 107 108 109 110 111 112
 *
 * This function is similar to BSD strlcat(), but differs when
 * size <= strlen(dst).
 *
 * @param dst destination buffer
 * @param src source string
 * @param size size of destination buffer
 * @return the total length of src and dst
113
 *
114 115 116
 * @warning since the return value use the length of src and dst, these
 * absolutely _must_ be a properly 0-terminated strings, otherwise this
 * will read beyond the end of the buffer and possibly crash.
117 118 119
 */
size_t av_strlcat(char *dst, const char *src, size_t size);

120 121
/**
 * Append output to a string, according to a format. Never write out of
122
 * the destination buffer, and always put a terminating 0 within
123 124 125 126 127 128 129 130 131
 * the buffer.
 * @param dst destination buffer (string to which the output is
 *  appended)
 * @param size total size of the destination buffer
 * @param fmt printf-compatible format string, specifying how the
 *  following parameters are used
 * @return the length of the string that would have been generated
 *  if enough space had been available
 */
132
size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4);
133

134 135 136 137 138 139 140 141 142 143 144 145 146 147
/**
 * Get the count of continuous non zero chars starting from the beginning.
 *
 * @param len maximum number of characters to check in the string, that
 *            is the maximum value which is returned by the function
 */
static inline size_t av_strnlen(const char *s, size_t len)
{
    size_t i;
    for (i = 0; i < len && s[i]; i++)
        ;
    return i;
}

148 149 150 151 152 153 154 155 156 157
/**
 * Print arguments following specified format into a large enough auto
 * allocated buffer. It is similar to GNU asprintf().
 * @param fmt printf-compatible format string, specifying how the
 *            following parameters are used.
 * @return the allocated string
 * @note You have to free the string yourself with av_free().
 */
char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2);

158 159 160 161 162
/**
 * Convert a number to a av_malloced string.
 */
char *av_d2str(double d);

163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
/**
 * Unescape the given string until a non escaped terminating char,
 * and return the token corresponding to the unescaped string.
 *
 * The normal \ and ' escaping is supported. Leading and trailing
 * whitespaces are removed, unless they are escaped with '\' or are
 * enclosed between ''.
 *
 * @param buf the buffer to parse, buf will be updated to point to the
 * terminating char
 * @param term a 0-terminated list of terminating chars
 * @return the malloced unescaped string, which must be av_freed by
 * the user, NULL in case of allocation failure
 */
char *av_get_token(const char **buf, const char *term);

Stefano Sabatini's avatar
Stefano Sabatini committed
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
/**
 * Split the string into several tokens which can be accessed by
 * successive calls to av_strtok().
 *
 * A token is defined as a sequence of characters not belonging to the
 * set specified in delim.
 *
 * On the first call to av_strtok(), s should point to the string to
 * parse, and the value of saveptr is ignored. In subsequent calls, s
 * should be NULL, and saveptr should be unchanged since the previous
 * call.
 *
 * This function is similar to strtok_r() defined in POSIX.1.
 *
 * @param s the string to parse, may be NULL
 * @param delim 0-terminated list of token delimiters, must be non-NULL
 * @param saveptr user-provided pointer which points to stored
 * information necessary for av_strtok() to continue scanning the same
 * string. saveptr is updated to point to the next character after the
 * first delimiter found, or to NULL if the string was terminated
 * @return the found token, or NULL when no token is found
 */
char *av_strtok(char *s, const char *delim, char **saveptr);

203 204 205
/**
 * Locale-independent conversion of ASCII isdigit.
 */
206
av_const int av_isdigit(int c);
207 208 209 210

/**
 * Locale-independent conversion of ASCII isgraph.
 */
211
av_const int av_isgraph(int c);
212 213 214 215

/**
 * Locale-independent conversion of ASCII isspace.
 */
216
av_const int av_isspace(int c);
217

218
/**
219
 * Locale-independent conversion of ASCII characters to uppercase.
220
 */
221
static inline av_const int av_toupper(int c)
222 223 224 225 226 227 228
{
    if (c >= 'a' && c <= 'z')
        c ^= 0x20;
    return c;
}

/**
229
 * Locale-independent conversion of ASCII characters to lowercase.
230
 */
231
static inline av_const int av_tolower(int c)
232 233 234 235 236 237
{
    if (c >= 'A' && c <= 'Z')
        c ^= 0x20;
    return c;
}

238 239 240
/**
 * Locale-independent conversion of ASCII isxdigit.
 */
241
av_const int av_isxdigit(int c);
242

243
/**
244
 * Locale-independent case-insensitive compare.
245
 * @note This means only ASCII-range characters are case-insensitive
246 247 248 249
 */
int av_strcasecmp(const char *a, const char *b);

/**
250
 * Locale-independent case-insensitive compare.
251
 * @note This means only ASCII-range characters are case-insensitive
252 253 254
 */
int av_strncasecmp(const char *a, const char *b, size_t n);

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270

/**
 * Thread safe basename.
 * @param path the path, on DOS both \ and / are considered separators.
 * @return pointer to the basename substring.
 */
const char *av_basename(const char *path);

/**
 * Thread safe dirname.
 * @param path the path, on DOS both \ and / are considered separators.
 * @return the path with the separator replaced by the string terminator or ".".
 * @note the function may change the input string.
 */
const char *av_dirname(char *path);

271 272 273 274 275 276 277 278
/**
 * Match instances of a name in a comma-separated list of names.
 * @param name  Name to look for.
 * @param names List of names.
 * @return 1 on match, 0 otherwise.
 */
int av_match_name(const char *name, const char *names);

279 280 281 282 283 284 285 286 287 288
/**
 * Append path component to the existing path.
 * Path separator '/' is placed between when needed.
 * Resulting string have to be freed with av_free().
 * @param path      base path
 * @param component component to be appended
 * @return new path or NULL on error.
 */
char *av_append_path_component(const char *path, const char *component);

Stefano Sabatini's avatar
Stefano Sabatini committed
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
enum AVEscapeMode {
    AV_ESCAPE_MODE_AUTO,      ///< Use auto-selected escaping mode.
    AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
    AV_ESCAPE_MODE_QUOTE,     ///< Use single-quote escaping.
};

/**
 * Consider spaces special and escape them even in the middle of the
 * string.
 *
 * This is equivalent to adding the whitespace characters to the special
 * characters lists, except it is guaranteed to use the exact same list
 * of whitespace characters as the rest of libavutil.
 */
#define AV_ESCAPE_FLAG_WHITESPACE 0x01

/**
 * Escape only specified special characters.
 * Without this flag, escape also any characters that may be considered
 * special by av_get_token(), such as the single quote.
 */
#define AV_ESCAPE_FLAG_STRICT 0x02

/**
 * Escape string in src, and put the escaped string in an allocated
 * string in *dst, which must be freed with av_free().
 *
 * @param dst           pointer where an allocated string is put
 * @param src           string to escape, must be non-NULL
 * @param special_chars string containing the special characters which
 *                      need to be escaped, can be NULL
 * @param mode          escape mode to employ, see AV_ESCAPE_MODE_* macros.
 *                      Any unknown value for mode will be considered equivalent to
 *                      AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without
 *                      notice.
 * @param flags         flags which control how to escape, see AV_ESCAPE_FLAG_ macros
 * @return the length of the allocated string, or a negative error code in case of error
 * @see av_bprint_escape()
 */
int av_escape(char **dst, const char *src, const char *special_chars,
              enum AVEscapeMode mode, int flags);

331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
#define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES          1 ///< accept codepoints over 0x10FFFF
#define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS             2 ///< accept non-characters - 0xFFFE and 0xFFFF
#define AV_UTF8_FLAG_ACCEPT_SURROGATES                 4 ///< accept UTF-16 surrogates codes
#define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML

#define AV_UTF8_FLAG_ACCEPT_ALL \
    AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES

/**
 * Read and decode a single UTF-8 code point (character) from the
 * buffer in *buf, and update *buf to point to the next byte to
 * decode.
 *
 * In case of an invalid byte sequence, the pointer will be updated to
 * the next byte after the invalid sequence and the function will
 * return an error code.
 *
 * Depending on the specified flags, the function will also fail in
 * case the decoded code point does not belong to a valid range.
 *
 * @note For speed-relevant code a carefully implemented use of
 * GET_UTF8() may be preferred.
 *
 * @param codep   pointer used to return the parsed code in case of success.
 *                The value in *codep is set even in case the range check fails.
 * @param bufp    pointer to the address the first byte of the sequence
 *                to decode, updated by the function to point to the
 *                byte next after the decoded sequence
 * @param buf_end pointer to the end of the buffer, points to the next
 *                byte past the last in the buffer. This is used to
 *                avoid buffer overreads (in case of an unfinished
 *                UTF-8 sequence towards the end of the buffer).
 * @param flags   a collection of AV_UTF8_FLAG_* flags
 * @return >= 0 in case a sequence was successfully read, a negative
 * value in case of invalid sequence
 */
int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
                   unsigned int flags);

370 371 372 373 374 375 376
/**
 * Check if a name is in a list.
 * @returns 0 if not found, or the 1 based index where it has been found in the
 *            list.
 */
int av_match_list(const char *name, const char *list, char separator);

377 378 379 380
/**
 * @}
 */

381
#endif /* AVUTIL_AVSTRING_H */