avstring.h 13.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2007 Mans Rullgard
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

21 22
#ifndef AVUTIL_AVSTRING_H
#define AVUTIL_AVSTRING_H
23 24

#include <stddef.h>
25
#include <stdint.h>
26
#include "attributes.h"
27

28 29 30 31 32
/**
 * @addtogroup lavu_string
 * @{
 */

33 34 35 36 37 38
/**
 * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to
 * the address of the first character in str after the prefix.
 *
 * @param str input string
 * @param pfx prefix to test
39
 * @param ptr updated if the prefix is matched inside str
40 41 42 43 44 45 46 47 48 49 50
 * @return non-zero if the prefix matches, zero otherwise
 */
int av_strstart(const char *str, const char *pfx, const char **ptr);

/**
 * Return non-zero if pfx is a prefix of str independent of case. If
 * it is, *ptr is set to the address of the first character in str
 * after the prefix.
 *
 * @param str input string
 * @param pfx prefix to test
51
 * @param ptr updated if the prefix is matched inside str
52 53 54 55
 * @return non-zero if the prefix matches, zero otherwise
 */
int av_stristart(const char *str, const char *pfx, const char **ptr);

56
/**
57 58 59
 * Locate the first case-independent occurrence in the string haystack
 * of the string needle.  A zero-length string needle is considered to
 * match at the start of haystack.
60 61 62
 *
 * This function is a case-insensitive version of the standard strstr().
 *
63 64 65 66
 * @param haystack string to search in
 * @param needle   string to search for
 * @return         pointer to the located match within haystack
 *                 or a null pointer if no match
67
 */
68
char *av_stristr(const char *haystack, const char *needle);
69

70 71 72 73 74 75 76 77 78 79 80 81 82 83
/**
 * Locate the first occurrence of the string needle in the string haystack
 * where not more than hay_length characters are searched. A zero-length
 * string needle is considered to match at the start of haystack.
 *
 * This function is a length-limited version of the standard strstr().
 *
 * @param haystack   string to search in
 * @param needle     string to search for
 * @param hay_length length of string to search in
 * @return           pointer to the located match within haystack
 *                   or a null pointer if no match
 */
char *av_strnstr(const char *haystack, const char *needle, size_t hay_length);
84

85 86
/**
 * Copy the string src to dst, but no more than size - 1 bytes, and
87
 * null-terminate dst.
88 89 90 91 92 93 94
 *
 * This function is the same as BSD strlcpy().
 *
 * @param dst destination buffer
 * @param src source string
 * @param size size of destination buffer
 * @return the length of src
95
 *
96
 * @warning since the return value is the length of src, src absolutely
97 98
 * _must_ be a properly 0-terminated string, otherwise this will read beyond
 * the end of the buffer and possibly crash.
99 100 101 102 103
 */
size_t av_strlcpy(char *dst, const char *src, size_t size);

/**
 * Append the string src to the string dst, but to a total length of
104
 * no more than size - 1 bytes, and null-terminate dst.
105 106 107 108 109 110 111 112
 *
 * This function is similar to BSD strlcat(), but differs when
 * size <= strlen(dst).
 *
 * @param dst destination buffer
 * @param src source string
 * @param size size of destination buffer
 * @return the total length of src and dst
113
 *
114 115 116
 * @warning since the return value use the length of src and dst, these
 * absolutely _must_ be a properly 0-terminated strings, otherwise this
 * will read beyond the end of the buffer and possibly crash.
117 118 119
 */
size_t av_strlcat(char *dst, const char *src, size_t size);

120 121
/**
 * Append output to a string, according to a format. Never write out of
122
 * the destination buffer, and always put a terminating 0 within
123 124 125 126 127 128 129 130 131
 * the buffer.
 * @param dst destination buffer (string to which the output is
 *  appended)
 * @param size total size of the destination buffer
 * @param fmt printf-compatible format string, specifying how the
 *  following parameters are used
 * @return the length of the string that would have been generated
 *  if enough space had been available
 */
132
size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4);
133

134 135 136 137 138 139 140 141 142 143 144 145 146 147
/**
 * Get the count of continuous non zero chars starting from the beginning.
 *
 * @param len maximum number of characters to check in the string, that
 *            is the maximum value which is returned by the function
 */
static inline size_t av_strnlen(const char *s, size_t len)
{
    size_t i;
    for (i = 0; i < len && s[i]; i++)
        ;
    return i;
}

148 149 150 151 152 153 154 155 156 157
/**
 * Print arguments following specified format into a large enough auto
 * allocated buffer. It is similar to GNU asprintf().
 * @param fmt printf-compatible format string, specifying how the
 *            following parameters are used.
 * @return the allocated string
 * @note You have to free the string yourself with av_free().
 */
char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2);

158
/**
Lou Logan's avatar
Lou Logan committed
159
 * Convert a number to an av_malloced string.
160 161 162
 */
char *av_d2str(double d);

163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
/**
 * Unescape the given string until a non escaped terminating char,
 * and return the token corresponding to the unescaped string.
 *
 * The normal \ and ' escaping is supported. Leading and trailing
 * whitespaces are removed, unless they are escaped with '\' or are
 * enclosed between ''.
 *
 * @param buf the buffer to parse, buf will be updated to point to the
 * terminating char
 * @param term a 0-terminated list of terminating chars
 * @return the malloced unescaped string, which must be av_freed by
 * the user, NULL in case of allocation failure
 */
char *av_get_token(const char **buf, const char *term);

Stefano Sabatini's avatar
Stefano Sabatini committed
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
/**
 * Split the string into several tokens which can be accessed by
 * successive calls to av_strtok().
 *
 * A token is defined as a sequence of characters not belonging to the
 * set specified in delim.
 *
 * On the first call to av_strtok(), s should point to the string to
 * parse, and the value of saveptr is ignored. In subsequent calls, s
 * should be NULL, and saveptr should be unchanged since the previous
 * call.
 *
 * This function is similar to strtok_r() defined in POSIX.1.
 *
 * @param s the string to parse, may be NULL
 * @param delim 0-terminated list of token delimiters, must be non-NULL
 * @param saveptr user-provided pointer which points to stored
 * information necessary for av_strtok() to continue scanning the same
 * string. saveptr is updated to point to the next character after the
 * first delimiter found, or to NULL if the string was terminated
 * @return the found token, or NULL when no token is found
 */
char *av_strtok(char *s, const char *delim, char **saveptr);

203 204 205
/**
 * Locale-independent conversion of ASCII isdigit.
 */
206 207 208 209
static inline av_const int av_isdigit(int c)
{
    return c >= '0' && c <= '9';
}
210 211 212 213

/**
 * Locale-independent conversion of ASCII isgraph.
 */
214 215 216 217
static inline av_const int av_isgraph(int c)
{
    return c > 32 && c < 127;
}
218 219 220 221

/**
 * Locale-independent conversion of ASCII isspace.
 */
222 223 224 225 226
static inline av_const int av_isspace(int c)
{
    return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' ||
           c == '\v';
}
227

228
/**
229
 * Locale-independent conversion of ASCII characters to uppercase.
230
 */
231
static inline av_const int av_toupper(int c)
232 233 234 235 236 237 238
{
    if (c >= 'a' && c <= 'z')
        c ^= 0x20;
    return c;
}

/**
239
 * Locale-independent conversion of ASCII characters to lowercase.
240
 */
241
static inline av_const int av_tolower(int c)
242 243 244 245 246 247
{
    if (c >= 'A' && c <= 'Z')
        c ^= 0x20;
    return c;
}

248 249 250
/**
 * Locale-independent conversion of ASCII isxdigit.
 */
251 252 253 254 255
static inline av_const int av_isxdigit(int c)
{
    c = av_tolower(c);
    return av_isdigit(c) || (c >= 'a' && c <= 'f');
}
256

257
/**
258
 * Locale-independent case-insensitive compare.
259
 * @note This means only ASCII-range characters are case-insensitive
260 261 262 263
 */
int av_strcasecmp(const char *a, const char *b);

/**
264
 * Locale-independent case-insensitive compare.
265
 * @note This means only ASCII-range characters are case-insensitive
266 267 268
 */
int av_strncasecmp(const char *a, const char *b, size_t n);

269 270 271 272
/**
 * Locale-independent strings replace.
 * @note This means only ASCII-range characters are replace
 */
273
char *av_strireplace(const char *str, const char *from, const char *to);
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289

/**
 * Thread safe basename.
 * @param path the path, on DOS both \ and / are considered separators.
 * @return pointer to the basename substring.
 */
const char *av_basename(const char *path);

/**
 * Thread safe dirname.
 * @param path the path, on DOS both \ and / are considered separators.
 * @return the path with the separator replaced by the string terminator or ".".
 * @note the function may change the input string.
 */
const char *av_dirname(char *path);

290 291
/**
 * Match instances of a name in a comma-separated list of names.
292 293 294 295 296
 * List entries are checked from the start to the end of the names list,
 * the first match ends further processing. If an entry prefixed with '-'
 * matches, then 0 is returned. The "ALL" list entry is considered to
 * match all names.
 *
297 298 299 300 301 302
 * @param name  Name to look for.
 * @param names List of names.
 * @return 1 on match, 0 otherwise.
 */
int av_match_name(const char *name, const char *names);

303 304 305 306 307 308 309 310 311 312
/**
 * Append path component to the existing path.
 * Path separator '/' is placed between when needed.
 * Resulting string have to be freed with av_free().
 * @param path      base path
 * @param component component to be appended
 * @return new path or NULL on error.
 */
char *av_append_path_component(const char *path, const char *component);

Stefano Sabatini's avatar
Stefano Sabatini committed
313 314 315 316 317 318 319 320 321 322 323 324 325 326
enum AVEscapeMode {
    AV_ESCAPE_MODE_AUTO,      ///< Use auto-selected escaping mode.
    AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
    AV_ESCAPE_MODE_QUOTE,     ///< Use single-quote escaping.
};

/**
 * Consider spaces special and escape them even in the middle of the
 * string.
 *
 * This is equivalent to adding the whitespace characters to the special
 * characters lists, except it is guaranteed to use the exact same list
 * of whitespace characters as the rest of libavutil.
 */
327
#define AV_ESCAPE_FLAG_WHITESPACE (1 << 0)
Stefano Sabatini's avatar
Stefano Sabatini committed
328 329 330 331 332 333

/**
 * Escape only specified special characters.
 * Without this flag, escape also any characters that may be considered
 * special by av_get_token(), such as the single quote.
 */
334
#define AV_ESCAPE_FLAG_STRICT (1 << 1)
Stefano Sabatini's avatar
Stefano Sabatini committed
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351

/**
 * Escape string in src, and put the escaped string in an allocated
 * string in *dst, which must be freed with av_free().
 *
 * @param dst           pointer where an allocated string is put
 * @param src           string to escape, must be non-NULL
 * @param special_chars string containing the special characters which
 *                      need to be escaped, can be NULL
 * @param mode          escape mode to employ, see AV_ESCAPE_MODE_* macros.
 *                      Any unknown value for mode will be considered equivalent to
 *                      AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without
 *                      notice.
 * @param flags         flags which control how to escape, see AV_ESCAPE_FLAG_ macros
 * @return the length of the allocated string, or a negative error code in case of error
 * @see av_bprint_escape()
 */
352
av_warn_unused_result
Stefano Sabatini's avatar
Stefano Sabatini committed
353 354 355
int av_escape(char **dst, const char *src, const char *special_chars,
              enum AVEscapeMode mode, int flags);

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
#define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES          1 ///< accept codepoints over 0x10FFFF
#define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS             2 ///< accept non-characters - 0xFFFE and 0xFFFF
#define AV_UTF8_FLAG_ACCEPT_SURROGATES                 4 ///< accept UTF-16 surrogates codes
#define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML

#define AV_UTF8_FLAG_ACCEPT_ALL \
    AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES

/**
 * Read and decode a single UTF-8 code point (character) from the
 * buffer in *buf, and update *buf to point to the next byte to
 * decode.
 *
 * In case of an invalid byte sequence, the pointer will be updated to
 * the next byte after the invalid sequence and the function will
 * return an error code.
 *
 * Depending on the specified flags, the function will also fail in
 * case the decoded code point does not belong to a valid range.
 *
 * @note For speed-relevant code a carefully implemented use of
 * GET_UTF8() may be preferred.
 *
 * @param codep   pointer used to return the parsed code in case of success.
 *                The value in *codep is set even in case the range check fails.
 * @param bufp    pointer to the address the first byte of the sequence
 *                to decode, updated by the function to point to the
 *                byte next after the decoded sequence
 * @param buf_end pointer to the end of the buffer, points to the next
 *                byte past the last in the buffer. This is used to
 *                avoid buffer overreads (in case of an unfinished
 *                UTF-8 sequence towards the end of the buffer).
 * @param flags   a collection of AV_UTF8_FLAG_* flags
 * @return >= 0 in case a sequence was successfully read, a negative
 * value in case of invalid sequence
 */
392
av_warn_unused_result
393 394 395
int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
                   unsigned int flags);

396 397 398 399 400 401 402
/**
 * Check if a name is in a list.
 * @returns 0 if not found, or the 1 based index where it has been found in the
 *            list.
 */
int av_match_list(const char *name, const char *list, char separator);

403 404 405 406 407 408
/**
 * See libc sscanf manual for more information.
 * Locale-independent sscanf implementation.
 */
int av_sscanf(const char *string, const char *format, ...);

409 410 411 412
/**
 * @}
 */

413
#endif /* AVUTIL_AVSTRING_H */