Commit 360932f7 authored by Zuxy Meng's avatar Zuxy Meng Committed by Guillaume Poirier

Fix ASF format parser's broken UTF-16 string handling

1. Add a PUT_UTF8 macro to common.h; code borrowed from libavcodec/flacenc.c.
2. Make use of the macro in flacenc.c
Patch by Zuxy Meng % zuxy P meng A gmail P com %
Original thread:
Date: Nov 5, 2006 9:56 AM
Subject: [Ffmpeg-devel] PUT_UTF8 & asf format enhancement

Originally committed as revision 6911 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent f4e31985
...@@ -1122,20 +1122,8 @@ static void put_sbits(PutBitContext *pb, int bits, int32_t val) ...@@ -1122,20 +1122,8 @@ static void put_sbits(PutBitContext *pb, int bits, int32_t val)
static void write_utf8(PutBitContext *pb, uint32_t val) static void write_utf8(PutBitContext *pb, uint32_t val)
{ {
int bytes, shift; uint8_t tmp;
PUT_UTF8(val, tmp, put_bits(pb, 8, tmp);)
if(val < 0x80){
put_bits(pb, 8, val);
return;
}
bytes= (av_log2(val)+4) / 5;
shift = (bytes - 1) * 6;
put_bits(pb, 8, (256 - (256>>bytes)) | (val >> shift));
while(shift >= 6){
shift -= 6;
put_bits(pb, 8, 0x80 | ((val >> shift) & 0x3F));
}
} }
static void output_frame_header(FlacEncodeContext *s) static void output_frame_header(FlacEncodeContext *s)
......
...@@ -322,7 +322,21 @@ static inline int ff_get_fourcc(const char *s){ ...@@ -322,7 +322,21 @@ static inline int ff_get_fourcc(const char *s){
#define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24)) #define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24))
#define MKBETAG(a,b,c,d) (d | (c << 8) | (b << 16) | (a << 24)) #define MKBETAG(a,b,c,d) (d | (c << 8) | (b << 16) | (a << 24))
/*!
* \def PUT_UTF8(val, GET_BYTE, ERROR)
* converts a 32-bit unicode character to its utf-8 encoded form (up to 6 bytes long).
* \param val is an input only argument and should be of type uint32_t. It holds
* a ucs4 encoded unicode character that is to be converted to utf-8. If
* val is given as a function it's executed only once.
* \param tmp is a temporary variable and should be of type uint8_t. It
* represents an intermediate value during conversion that is to be
* outputted by PUT_BYTE.
* \param PUT_BYTE writes the converted utf-8 bytes to any proper destination.
* It could be a function or a statement, and uses tmp as the input byte.
* For example, PUT_BYTE could be "*output++ = tmp;" PUT_BYTE will be
* executed up to 6 times, depending on the length of the converted
* unicode character.
*/
#define GET_UTF8(val, GET_BYTE, ERROR)\ #define GET_UTF8(val, GET_BYTE, ERROR)\
val= GET_BYTE;\ val= GET_BYTE;\
{\ {\
...@@ -338,6 +352,26 @@ static inline int ff_get_fourcc(const char *s){ ...@@ -338,6 +352,26 @@ static inline int ff_get_fourcc(const char *s){
}\ }\
} }
#define PUT_UTF8(val, tmp, PUT_BYTE)\
{\
int bytes, shift;\
uint32_t in = val;\
if (in < 0x80) {\
tmp = in;\
PUT_BYTE\
} else {\
bytes = (av_log2(in) + 4) / 5;\
shift = (bytes - 1) * 6;\
tmp = (256 - (256 >> bytes)) | (in >> shift);\
PUT_BYTE\
while (shift >= 6) {\
shift -= 6;\
tmp = 0x80 | ((in >> shift) & 0x3f);\
PUT_BYTE\
}\
}\
}
#if defined(ARCH_X86) || defined(ARCH_POWERPC) #if defined(ARCH_X86) || defined(ARCH_POWERPC)
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
static inline uint64_t read_time(void) static inline uint64_t read_time(void)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment