Commit cac2295b authored by wm4's avatar wm4 Committed by Michael Niedermayer

matroska: redo seekhead handling

In particular, this reads chained seekheads. This makes seeking faster
in files which have the index indirectly linked through 2 seekheads.

As a side-effect, this warns when reading level-1 (toplevel) elements
multiple times (other than seekheads, clusters, and void/crc). Such
elements are not valid and likely break everything.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent a5c9befb
...@@ -71,6 +71,7 @@ typedef enum { ...@@ -71,6 +71,7 @@ typedef enum {
EBML_UTF8, EBML_UTF8,
EBML_BIN, EBML_BIN,
EBML_NEST, EBML_NEST,
EBML_LEVEL1,
EBML_PASS, EBML_PASS,
EBML_STOP, EBML_STOP,
EBML_SINT, EBML_SINT,
...@@ -252,6 +253,12 @@ typedef struct { ...@@ -252,6 +253,12 @@ typedef struct {
EbmlList blocks; EbmlList blocks;
} MatroskaCluster; } MatroskaCluster;
typedef struct {
uint64_t id;
uint64_t pos;
int parsed;
} MatroskaLevel1Element;
typedef struct { typedef struct {
AVFormatContext *ctx; AVFormatContext *ctx;
...@@ -290,6 +297,10 @@ typedef struct { ...@@ -290,6 +297,10 @@ typedef struct {
/* File has a CUES element, but we defer parsing until it is needed. */ /* File has a CUES element, but we defer parsing until it is needed. */
int cues_parsing_deferred; int cues_parsing_deferred;
/* Level1 elements and whether they were read yet */
MatroskaLevel1Element level1_elems[64];
int num_level1_elems;
int current_cluster_num_blocks; int current_cluster_num_blocks;
int64_t current_cluster_pos; int64_t current_cluster_pos;
MatroskaCluster current_cluster; MatroskaCluster current_cluster;
...@@ -551,13 +562,13 @@ static EbmlSyntax matroska_seekhead[] = { ...@@ -551,13 +562,13 @@ static EbmlSyntax matroska_seekhead[] = {
}; };
static EbmlSyntax matroska_segment[] = { static EbmlSyntax matroska_segment[] = {
{ MATROSKA_ID_INFO, EBML_NEST, 0, 0, { .n = matroska_info } }, { MATROSKA_ID_INFO, EBML_LEVEL1, 0, 0, { .n = matroska_info } },
{ MATROSKA_ID_TRACKS, EBML_NEST, 0, 0, { .n = matroska_tracks } }, { MATROSKA_ID_TRACKS, EBML_LEVEL1, 0, 0, { .n = matroska_tracks } },
{ MATROSKA_ID_ATTACHMENTS, EBML_NEST, 0, 0, { .n = matroska_attachments } }, { MATROSKA_ID_ATTACHMENTS, EBML_LEVEL1, 0, 0, { .n = matroska_attachments } },
{ MATROSKA_ID_CHAPTERS, EBML_NEST, 0, 0, { .n = matroska_chapters } }, { MATROSKA_ID_CHAPTERS, EBML_LEVEL1, 0, 0, { .n = matroska_chapters } },
{ MATROSKA_ID_CUES, EBML_NEST, 0, 0, { .n = matroska_index } }, { MATROSKA_ID_CUES, EBML_LEVEL1, 0, 0, { .n = matroska_index } },
{ MATROSKA_ID_TAGS, EBML_NEST, 0, 0, { .n = matroska_tags } }, { MATROSKA_ID_TAGS, EBML_LEVEL1, 0, 0, { .n = matroska_tags } },
{ MATROSKA_ID_SEEKHEAD, EBML_NEST, 0, 0, { .n = matroska_seekhead } }, { MATROSKA_ID_SEEKHEAD, EBML_LEVEL1, 0, 0, { .n = matroska_seekhead } },
{ MATROSKA_ID_CLUSTER, EBML_STOP }, { MATROSKA_ID_CLUSTER, EBML_STOP },
{ 0 } { 0 }
}; };
...@@ -976,6 +987,42 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax, ...@@ -976,6 +987,42 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
return res; return res;
} }
/*
* Allocate and return the entry for the level1 element with the given ID. If
* an entry already exists, return the existing entry.
*/
static MatroskaLevel1Element *matroska_find_level1_elem(MatroskaDemuxContext *matroska,
uint32_t id)
{
int i;
MatroskaLevel1Element *elem;
// Some files link to all clusters; useless.
if (id == MATROSKA_ID_CLUSTER)
return NULL;
// There can be multiple seekheads.
if (id != MATROSKA_ID_SEEKHEAD) {
for (i = 0; i < matroska->num_level1_elems; i++) {
if (matroska->level1_elems[i].id == id)
return &matroska->level1_elems[i];
}
}
// Only a completely broken file would have more elements.
// It also provides a low-effort way to escape from circular seekheads
// (every iteration will add a level1 entry).
if (matroska->num_level1_elems >= FF_ARRAY_ELEMS(matroska->level1_elems)) {
av_log(matroska->ctx, AV_LOG_ERROR, "Too many level1 elements or circular seekheads.\n");
return NULL;
}
elem = &matroska->level1_elems[matroska->num_level1_elems++];
*elem = (MatroskaLevel1Element){.id = id};
return elem;
}
static int ebml_parse_elem(MatroskaDemuxContext *matroska, static int ebml_parse_elem(MatroskaDemuxContext *matroska,
EbmlSyntax *syntax, void *data) EbmlSyntax *syntax, void *data)
{ {
...@@ -994,6 +1041,7 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska, ...@@ -994,6 +1041,7 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
uint64_t length; uint64_t length;
int res; int res;
void *newelem; void *newelem;
MatroskaLevel1Element *level1_elem;
data = (char *) data + syntax->data_offset; data = (char *) data + syntax->data_offset;
if (syntax->list_elem_size) { if (syntax->list_elem_size) {
...@@ -1036,11 +1084,20 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska, ...@@ -1036,11 +1084,20 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
case EBML_BIN: case EBML_BIN:
res = ebml_read_binary(pb, length, data); res = ebml_read_binary(pb, length, data);
break; break;
case EBML_LEVEL1:
case EBML_NEST: case EBML_NEST:
if ((res = ebml_read_master(matroska, length)) < 0) if ((res = ebml_read_master(matroska, length)) < 0)
return res; return res;
if (id == MATROSKA_ID_SEGMENT) if (id == MATROSKA_ID_SEGMENT)
matroska->segment_start = avio_tell(matroska->ctx->pb); matroska->segment_start = avio_tell(matroska->ctx->pb);
if (id == MATROSKA_ID_CUES)
matroska->cues_parsing_deferred = 0;
if (syntax->type == EBML_LEVEL1 &&
(level1_elem = matroska_find_level1_elem(matroska, syntax->id))) {
if (level1_elem->parsed)
av_log(matroska->ctx, AV_LOG_ERROR, "Duplicate element\n");
level1_elem->parsed = 1;
}
return ebml_parse_nest(matroska, syntax->def.n, data); return ebml_parse_nest(matroska, syntax->def.n, data);
case EBML_PASS: case EBML_PASS:
return ebml_parse_id(matroska, syntax->def.n, id, data); return ebml_parse_id(matroska, syntax->def.n, id, data);
...@@ -1071,6 +1128,7 @@ static void ebml_free(EbmlSyntax *syntax, void *data) ...@@ -1071,6 +1128,7 @@ static void ebml_free(EbmlSyntax *syntax, void *data)
case EBML_BIN: case EBML_BIN:
av_freep(&((EbmlBin *) data_off)->data); av_freep(&((EbmlBin *) data_off)->data);
break; break;
case EBML_LEVEL1:
case EBML_NEST: case EBML_NEST:
if (syntax[i].list_elem_size) { if (syntax[i].list_elem_size) {
EbmlList *list = data_off; EbmlList *list = data_off;
...@@ -1356,24 +1414,17 @@ static void matroska_convert_tags(AVFormatContext *s) ...@@ -1356,24 +1414,17 @@ static void matroska_convert_tags(AVFormatContext *s)
} }
static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska, static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
int idx) uint64_t pos)
{ {
EbmlList *seekhead_list = &matroska->seekhead;
uint32_t level_up = matroska->level_up; uint32_t level_up = matroska->level_up;
uint32_t saved_id = matroska->current_id; uint32_t saved_id = matroska->current_id;
MatroskaSeekhead *seekhead = seekhead_list->elem;
int64_t before_pos = avio_tell(matroska->ctx->pb); int64_t before_pos = avio_tell(matroska->ctx->pb);
MatroskaLevel level; MatroskaLevel level;
int64_t offset; int64_t offset;
int ret = 0; int ret = 0;
if (idx >= seekhead_list->nb_elem ||
seekhead[idx].id == MATROSKA_ID_SEEKHEAD ||
seekhead[idx].id == MATROSKA_ID_CLUSTER)
return 0;
/* seek */ /* seek */
offset = seekhead[idx].pos + matroska->segment_start; offset = pos + matroska->segment_start;
if (avio_seek(matroska->ctx->pb, offset, SEEK_SET) == offset) { if (avio_seek(matroska->ctx->pb, offset, SEEK_SET) == offset) {
/* We don't want to lose our seekhead level, so we add /* We don't want to lose our seekhead level, so we add
* a dummy. This is a crude hack. */ * a dummy. This is a crude hack. */
...@@ -1410,37 +1461,35 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska, ...@@ -1410,37 +1461,35 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
static void matroska_execute_seekhead(MatroskaDemuxContext *matroska) static void matroska_execute_seekhead(MatroskaDemuxContext *matroska)
{ {
EbmlList *seekhead_list = &matroska->seekhead; EbmlList *seekhead_list = &matroska->seekhead;
int64_t before_pos = avio_tell(matroska->ctx->pb);
int i; int i;
int nb_elem;
// we should not do any seeking in the streaming case // we should not do any seeking in the streaming case
if (!matroska->ctx->pb->seekable || if (!matroska->ctx->pb->seekable ||
(matroska->ctx->flags & AVFMT_FLAG_IGNIDX)) (matroska->ctx->flags & AVFMT_FLAG_IGNIDX))
return; return;
// do not read entries that are added while parsing seekhead entries for (i = 0; i < seekhead_list->nb_elem; i++) {
nb_elem = seekhead_list->nb_elem; MatroskaSeekhead *seekheads = seekhead_list->elem;
uint32_t id = seekheads[i].id;
uint64_t pos = seekheads[i].pos;
for (i = 0; i < nb_elem; i++) { MatroskaLevel1Element *elem = matroska_find_level1_elem(matroska, id);
MatroskaSeekhead *seekhead = seekhead_list->elem; if (!elem || elem->parsed)
if (seekhead[i].pos <= before_pos)
continue; continue;
elem->pos = pos;
// defer cues parsing until we actually need cue data. // defer cues parsing until we actually need cue data.
if (seekhead[i].id == MATROSKA_ID_CUES) { if (id == MATROSKA_ID_CUES)
matroska->cues_parsing_deferred = 1;
continue; continue;
}
if (matroska_parse_seekhead_entry(matroska, i) < 0) { if (matroska_parse_seekhead_entry(matroska, pos) < 0) {
// mark index as broken // mark index as broken
matroska->cues_parsing_deferred = -1; matroska->cues_parsing_deferred = -1;
break; break;
} }
}
if (nb_elem != seekhead_list->nb_elem) { elem->parsed = 1;
avpriv_request_sample(matroska->ctx, "recursive SeekHead elements");
} }
} }
...@@ -1474,17 +1523,18 @@ static void matroska_add_index_entries(MatroskaDemuxContext *matroska) ...@@ -1474,17 +1523,18 @@ static void matroska_add_index_entries(MatroskaDemuxContext *matroska)
} }
static void matroska_parse_cues(MatroskaDemuxContext *matroska) { static void matroska_parse_cues(MatroskaDemuxContext *matroska) {
EbmlList *seekhead_list = &matroska->seekhead;
MatroskaSeekhead *seekhead = seekhead_list->elem;
int i; int i;
for (i = 0; i < seekhead_list->nb_elem; i++) for (i = 0; i < matroska->num_level1_elems; i++) {
if (seekhead[i].id == MATROSKA_ID_CUES) MatroskaLevel1Element *elem = &matroska->level1_elems[i];
if (elem->id == MATROSKA_ID_CUES && !elem->parsed) {
if (matroska_parse_seekhead_entry(matroska, elem->pos) < 0)
matroska->cues_parsing_deferred = -1;
elem->parsed = 1;
break; break;
av_assert1(i <= seekhead_list->nb_elem); }
}
if (matroska_parse_seekhead_entry(matroska, i) < 0)
matroska->cues_parsing_deferred = -1;
matroska_add_index_entries(matroska); matroska_add_index_entries(matroska);
} }
...@@ -2014,6 +2064,7 @@ static int matroska_read_header(AVFormatContext *s) ...@@ -2014,6 +2064,7 @@ static int matroska_read_header(AVFormatContext *s)
int i, j, res; int i, j, res;
matroska->ctx = s; matroska->ctx = s;
matroska->cues_parsing_deferred = 1;
/* First read the EBML header. */ /* First read the EBML header. */
if (ebml_parse(matroska, ebml_syntax, &ebml) || if (ebml_parse(matroska, ebml_syntax, &ebml) ||
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment