Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
68590650
Commit
68590650
authored
Oct 02, 2013
by
Stefano Sabatini
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
lavu/avstring: add av_utf8_decode() function
parent
e782eea1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
180 additions
and
1 deletion
+180
-1
APIchanges
doc/APIchanges
+3
-0
Makefile
libavutil/Makefile
+1
-0
avstring.c
libavutil/avstring.c
+64
-0
avstring.h
libavutil/avstring.h
+40
-0
utf8.c
libavutil/utf8.c
+71
-0
version.h
libavutil/version.h
+1
-1
No files found.
doc/APIchanges
View file @
68590650
...
...
@@ -15,6 +15,9 @@ libavutil: 2012-10-22
API changes, most recent first:
2013-11-XX - xxxxxxx - lavu 52.54.100 - avstring.h
Add av_utf8_decode() function.
2013-11-xx - xxxxxxx - lavc 55.44.100 - avcodec.h
Add av_packet_{un,}pack_dictionary()
Add AV_PKT_METADATA_UPDATE side data type, used to transmit key/value
...
...
libavutil/Makefile
View file @
68590650
...
...
@@ -157,6 +157,7 @@ TESTPROGS = adler32 \
sha
\
sha512
\
tree
\
utf8
\
xtea
\
TESTPROGS-$(HAVE_LZO1X_999_COMPRESS)
+=
lzo
...
...
libavutil/avstring.c
View file @
68590650
...
...
@@ -307,6 +307,70 @@ int av_isxdigit(int c)
return
av_isdigit
(
c
)
||
(
c
>=
'a'
&&
c
<=
'f'
);
}
int
av_utf8_decode
(
int32_t
*
codep
,
const
uint8_t
**
bufp
,
const
uint8_t
*
buf_end
,
unsigned
int
flags
)
{
const
uint8_t
*
p
=
*
bufp
;
uint32_t
top
;
uint64_t
code
;
int
ret
=
0
;
if
(
p
>=
buf_end
)
return
0
;
code
=
*
p
++
;
/* first sequence byte starts with 10, or is 1111-1110 or 1111-1111,
which is not admitted */
if
((
code
&
0xc0
)
==
0x80
||
code
>=
0xFE
)
{
ret
=
AVERROR
(
EILSEQ
);
goto
end
;
}
top
=
(
code
&
128
)
>>
1
;
while
(
code
&
top
)
{
int
tmp
;
if
(
p
>=
buf_end
)
{
ret
=
AVERROR
(
EILSEQ
);
/* incomplete sequence */
goto
end
;
}
/* we assume the byte to be in the form 10xx-xxxx */
tmp
=
*
p
++
-
128
;
/* strip leading 1 */
if
(
tmp
>>
6
)
{
ret
=
AVERROR
(
EILSEQ
);
goto
end
;
}
code
=
(
code
<<
6
)
+
tmp
;
top
<<=
5
;
}
code
&=
(
top
<<
1
)
-
1
;
if
(
code
>=
1
<<
31
)
{
ret
=
AVERROR
(
EILSEQ
);
/* out-of-range value */
goto
end
;
}
*
codep
=
code
;
if
(
code
>
0x10FFFF
&&
!
(
flags
&
AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES
))
ret
=
AVERROR
(
EILSEQ
);
if
(
code
<
0x20
&&
code
!=
0x9
&&
code
!=
0xA
&&
code
!=
0xD
&&
flags
&
AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES
)
ret
=
AVERROR
(
EILSEQ
);
if
(
code
>=
0xD800
&&
code
<=
0xDFFF
&&
!
(
flags
&
AV_UTF8_FLAG_ACCEPT_SURROGATES
))
ret
=
AVERROR
(
EILSEQ
);
if
(
code
==
0xFFFE
||
code
==
0xFFFF
&&
(
!
flags
&
AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS
))
ret
=
AVERROR
(
EILSEQ
);
end:
*
bufp
=
p
;
return
ret
;
}
#ifdef TEST
int
main
(
void
)
...
...
libavutil/avstring.h
View file @
68590650
...
...
@@ -22,6 +22,7 @@
#define AVUTIL_AVSTRING_H
#include <stddef.h>
#include <stdint.h>
#include "attributes.h"
/**
...
...
@@ -295,6 +296,45 @@ enum AVEscapeMode {
int
av_escape
(
char
**
dst
,
const
char
*
src
,
const
char
*
special_chars
,
enum
AVEscapeMode
mode
,
int
flags
);
#define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF
#define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF
#define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes
#define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML
#define AV_UTF8_FLAG_ACCEPT_ALL \
AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES
/**
* Read and decode a single UTF-8 code point (character) from the
* buffer in *buf, and update *buf to point to the next byte to
* decode.
*
* In case of an invalid byte sequence, the pointer will be updated to
* the next byte after the invalid sequence and the function will
* return an error code.
*
* Depending on the specified flags, the function will also fail in
* case the decoded code point does not belong to a valid range.
*
* @note For speed-relevant code a carefully implemented use of
* GET_UTF8() may be preferred.
*
* @param codep pointer used to return the parsed code in case of success.
* The value in *codep is set even in case the range check fails.
* @param bufp pointer to the address the first byte of the sequence
* to decode, updated by the function to point to the
* byte next after the decoded sequence
* @param buf_end pointer to the end of the buffer, points to the next
* byte past the last in the buffer. This is used to
* avoid buffer overreads (in case of an unfinished
* UTF-8 sequence towards the end of the buffer).
* @param flags a collection of AV_UTF8_FLAG_* flags
* @return >= 0 in case a sequence was successfully read, a negative
* value in case of invalid sequence
*/
int
av_utf8_decode
(
int32_t
*
codep
,
const
uint8_t
**
bufp
,
const
uint8_t
*
buf_end
,
unsigned
int
flags
);
/**
* @}
*/
...
...
libavutil/utf8.c
0 → 100644
View file @
68590650
/*
* Copyright (c) 2013 Stefano Sabatini
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdio.h>
#include "libavutil/avstring.h"
#include "libavutil/file.h"
static
void
print_sequence
(
const
char
*
p
,
int
l
,
int
indent
)
{
int
i
;
for
(
i
=
0
;
i
<
l
;
i
++
)
printf
(
"%02X"
,
(
uint8_t
)
p
[
i
]);
printf
(
"%*s"
,
indent
-
l
*
2
,
""
);
}
int
main
(
int
argc
,
char
**
argv
)
{
int
ret
;
char
*
filename
=
argv
[
1
];
uint8_t
*
file_buf
;
size_t
file_buf_size
;
uint32_t
code
;
const
uint8_t
*
p
,
*
endp
;
ret
=
av_file_map
(
filename
,
&
file_buf
,
&
file_buf_size
,
0
,
NULL
);
if
(
ret
<
0
)
return
1
;
p
=
file_buf
;
endp
=
file_buf
+
file_buf_size
;
while
(
p
<
endp
)
{
int
l
,
r
;
const
uint8_t
*
p0
=
p
;
code
=
UINT32_MAX
;
r
=
av_utf8_decode
(
&
code
,
&
p
,
endp
,
0
);
l
=
(
int
)(
p
-
p0
);
print_sequence
(
p0
,
l
,
20
);
if
(
code
!=
UINT32_MAX
)
{
printf
(
"%-10d 0x%-10X %-5d "
,
code
,
code
,
l
);
if
(
r
>=
0
)
{
if
(
*
p0
==
'\n'
)
printf
(
"
\\
n
\n
"
);
else
printf
(
"%.*s
\n
"
,
l
,
p0
);
}
else
{
printf
(
"invalid code range
\n
"
);
}
}
else
{
printf
(
"invalid sequence
\n
"
);
}
}
av_file_unmap
(
file_buf
,
file_buf_size
);
return
0
;
}
libavutil/version.h
View file @
68590650
...
...
@@ -75,7 +75,7 @@
*/
#define LIBAVUTIL_VERSION_MAJOR 52
#define LIBAVUTIL_VERSION_MINOR 5
3
#define LIBAVUTIL_VERSION_MINOR 5
4
#define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment