📄 text_import.c
字号:
/* * GPAC - Multimedia Framework C SDK * * Copyright (c) Jean Le Feuvre 2000-2005 * All rights reserved * * This file is part of GPAC / Media Tools sub-project * * GPAC is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * GPAC is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * */#include <gpac/constants.h>#include <gpac/utf.h>#include <gpac/xml.h>#include <gpac/token.h>#include <gpac/internal/media_dev.h>#ifndef GPAC_READ_ONLYenum{ GF_TEXT_IMPORT_NONE = 0, GF_TEXT_IMPORT_SRT, GF_TEXT_IMPORT_SUB, GF_TEXT_IMPORT_TTXT, GF_TEXT_IMPORT_TEXML,};#define REM_TRAIL_MARKS(__str, __sep) while (1) { \ u32 _len = strlen(__str); \ if (!_len) break; \ _len--; \ if (strchr(__sep, __str[_len])) __str[_len] = 0; \ else break; \ } \static s32 gf_text_get_utf_type(FILE *in_src){ unsigned char BOM[5]; fread(BOM, 5, 1, in_src); if ((BOM[0]==0xFF) && (BOM[1]==0xFE)) { /*UTF32 not supported*/ if (!BOM[2] && !BOM[3]) return -1; fseek(in_src, 2, SEEK_SET); return 3; } if ((BOM[0]==0xFE) && (BOM[1]==0xFF)) { /*UTF32 not supported*/ if (!BOM[2] && !BOM[3]) return -1; fseek(in_src, 2, SEEK_SET); return 2; } else if ((BOM[0]==0xEF) && (BOM[1]==0xBB) && (BOM[2]==0xBF)) { fseek(in_src, 3, SEEK_SET); return 1; } if (BOM[0]<0x80) { fseek(in_src, 0, SEEK_SET); return 0; } return -1;}static GF_Err gf_text_guess_format(char *filename, u32 *fmt){ char szLine[2048]; u32 val; s32 uni_type; FILE *test = fopen(filename, "rb"); if (!test) return GF_URL_ERROR; uni_type = gf_text_get_utf_type(test); if (uni_type>1) { const u16 *sptr; char szUTF[1024]; u32 read = fread(szUTF, 1, 1023, test); szUTF[read]=0; sptr = (u16*)szUTF; read = gf_utf8_wcstombs(szLine, read, &sptr); } else { val = fread(szLine, 1, 1024, test); szLine[val]=0; } REM_TRAIL_MARKS(szLine, "\r\n\t ") *fmt = GF_TEXT_IMPORT_NONE; if ((szLine[0]=='{') && strstr(szLine, "}{")) *fmt = GF_TEXT_IMPORT_SUB; else if (!strnicmp(szLine, "<?xml ", 6)) { char *ext = strrchr(filename, '.'); if (!strnicmp(ext, ".ttxt", 5)) *fmt = GF_TEXT_IMPORT_TTXT; ext = strstr(szLine, "?>"); if (ext) ext += 2; if (!ext[0]) fgets(szLine, 2048, test); if (strstr(szLine, "x-quicktime-tx3g") || strstr(szLine, "text3GTrack")) *fmt = GF_TEXT_IMPORT_TEXML; else if (strstr(szLine, "TextStream")) *fmt = GF_TEXT_IMPORT_TTXT; } else if (strstr(szLine, " --> ") ) *fmt = GF_TEXT_IMPORT_SRT; fclose(test); return GF_OK;}#define TTXT_DEFAULT_WIDTH 400#define TTXT_DEFAULT_HEIGHT 60#define TTXT_DEFAULT_FONT_SIZE 18static void gf_text_get_video_size(GF_ISOFile *dest, u32 *width, u32 *height){ u32 w, h, f_w, f_h, i; f_w = f_h = 0; for (i=0; i<gf_isom_get_track_count(dest); i++) { switch (gf_isom_get_media_type(dest, i+1)) { case GF_ISOM_MEDIA_SCENE: case GF_ISOM_MEDIA_VISUAL: gf_isom_get_visual_info(dest, i+1, 1, &w, &h); if (w > f_w) f_w = w; if (h > f_h) f_h = h; gf_isom_get_track_layout_info(dest, i+1, &w, &h, NULL, NULL, NULL); if (w > f_w) f_w = w; if (h > f_h) f_h = h; break; } } (*width) = f_w ? f_w : TTXT_DEFAULT_WIDTH; (*height) = f_h ? f_h : TTXT_DEFAULT_HEIGHT;}static void gf_text_import_set_language(GF_MediaImporter *import, u32 track){ if (import->esd && import->esd->langDesc) { char lang[4]; lang[0] = (import->esd->langDesc->langCode>>16) & 0xFF; lang[1] = (import->esd->langDesc->langCode>>8) & 0xFF; lang[2] = (import->esd->langDesc->langCode) & 0xFF; lang[3] = 0; gf_isom_set_media_language(import->dest, track, lang); }}static char *gf_text_get_utf8_line(char *szLine, u32 lineSize, FILE *txt_in, s32 unicode_type){ u32 i, j, len; char *sOK; char szLineConv[1024]; unsigned short *sptr; memset(szLine, 0, sizeof(char)*lineSize); sOK = fgets(szLine, lineSize, txt_in); if (!sOK) return NULL; if (unicode_type<=1) { j=0; len = strlen(szLine); for (i=0; i<len; i++) { if (!unicode_type && (szLine[i] & 0x80)) { /*non UTF8 (likely some win-CP)*/ if ((szLine[i+1] & 0xc0) != 0x80) { szLineConv[j] = 0xc0 | ( (szLine[i] >> 6) & 0x3 ); j++; szLine[i] &= 0xbf; } /*UTF8 2 bytes char*/ else if ( (szLine[i] & 0xe0) == 0xc0) { szLineConv[j] = szLine[i]; i++; j++; } /*UTF8 3 bytes char*/ else if ( (szLine[i] & 0xf0) == 0xe0) { szLineConv[j] = szLine[i]; i++; j++; szLineConv[j] = szLine[i]; i++; j++; } /*UTF8 4 bytes char*/ else if ( (szLine[i] & 0xf8) == 0xf0) { szLineConv[j] = szLine[i]; i++; j++; szLineConv[j] = szLine[i]; i++; j++; szLineConv[j] = szLine[i]; i++; j++; } else { i+=1; continue; } } szLineConv[j] = szLine[i]; j++; } szLineConv[j] = 0; strcpy(szLine, szLineConv); return sOK; }#ifdef GPAC_BIG_ENDIAN if (unicode_type==3) {#else if (unicode_type==2) {#endif i=0; while (1) { char c; if (!szLine[i] && !szLine[i+1]) break; c = szLine[i+1]; szLine[i+1] = szLine[i]; szLine[i] = c; i+=2; } } sptr = (u16 *)szLine; i = gf_utf8_wcstombs(szLineConv, 1024, (const unsigned short **) &sptr); szLineConv[i] = 0; strcpy(szLine, szLineConv); /*this is ugly indeed: since input is UTF16-LE, there are many chances the fgets never reads the \0 after a \n*/ if (unicode_type==3) fgetc(txt_in); return sOK;}static GF_Err gf_text_import_srt(GF_MediaImporter *import){ FILE *srt_in; Double scale; u32 track, timescale, i, count; GF_TextConfig*cfg; GF_Err e; GF_StyleRecord rec; GF_TextSample * samp; GF_ISOSample *s; u32 sh, sm, ss, sms, eh, em, es, ems, txt_line, char_len, char_line, nb_samp, j, duration, file_size, rem_styles; Bool set_start_char, set_end_char, first_samp; u64 start, end, prev_end; u32 state, curLine, line, len, ID, OCR_ES_ID; s32 unicode_type; char szLine[2048], szText[2048], *ptr; unsigned short uniLine[5000], uniText[5000], *sptr; srt_in = fopen(import->in_name, "rt"); fseek(srt_in, 0, SEEK_END); file_size = ftell(srt_in); fseek(srt_in, 0, SEEK_SET); unicode_type = gf_text_get_utf_type(srt_in); if (unicode_type<0) { fclose(srt_in); return gf_import_message(import, GF_NOT_SUPPORTED, "Unsupported SRT UTF encoding"); } cfg = NULL; if (import->esd) { if (!import->esd->slConfig) { import->esd->slConfig = (GF_SLConfig *) gf_odf_desc_new(GF_ODF_SLC_TAG); import->esd->slConfig->predefined = 2; import->esd->slConfig->timestampResolution = 1000; } timescale = import->esd->slConfig->timestampResolution; if (!timescale) timescale = 1000; /*explicit text config*/ if (import->esd->decoderConfig && import->esd->decoderConfig->decoderSpecificInfo->tag == GF_ODF_TEXT_CFG_TAG) { cfg = (GF_TextConfig *) import->esd->decoderConfig->decoderSpecificInfo; import->esd->decoderConfig->decoderSpecificInfo = NULL; } ID = import->esd->ESID; OCR_ES_ID = import->esd->OCRESID; } else { timescale = 1000; OCR_ES_ID = ID = 0; } if (cfg && cfg->timescale) timescale = cfg->timescale; track = gf_isom_new_track(import->dest, ID, GF_ISOM_MEDIA_TEXT, timescale); if (!track) { fclose(srt_in); return gf_import_message(import, gf_isom_last_error(import->dest), "Error creating text track"); } gf_isom_set_track_enabled(import->dest, track, 1); if (import->esd && !import->esd->ESID) import->esd->ESID = gf_isom_get_track_id(import->dest, track); if (OCR_ES_ID) gf_isom_set_track_reference(import->dest, track, GF_ISOM_REF_OCR, OCR_ES_ID); /*setup track*/ if (cfg) { char *firstFont = NULL; /*set track info*/ gf_isom_set_track_layout_info(import->dest, track, cfg->text_width<<16, cfg->text_height<<16, 0, 0, cfg->layer); /*and set sample descriptions*/ count = gf_list_count(cfg->sample_descriptions); for (i=0; i<count; i++) { GF_TextSampleDescriptor *sd= (GF_TextSampleDescriptor *)gf_list_get(cfg->sample_descriptions, i); if (!sd->font_count) { sd->fonts = (GF_FontRecord*)malloc(sizeof(GF_FontRecord)); sd->font_count = 1; sd->fonts[0].fontID = 1; sd->fonts[0].fontName = strdup("Serif"); } if (!sd->default_style.fontID) sd->default_style.fontID = sd->fonts[0].fontID; if (!sd->default_style.font_size) sd->default_style.font_size = 16; if (!sd->default_style.text_color) sd->default_style.text_color = 0xFF000000; /*store attribs*/ if (!i) rec = sd->default_style; gf_isom_new_text_description(import->dest, track, sd, NULL, NULL, &state); if (!firstFont) firstFont = sd->fonts[0].fontName; } gf_import_message(import, GF_OK, "Timed Text (SRT) import - text track %d x %d, font %s (size %d)", cfg->text_width, cfg->text_height, firstFont, rec.font_size); gf_odf_desc_del((GF_Descriptor *)cfg); } else { u32 w, h; GF_TextSampleDescriptor *sd; gf_text_get_video_size(import->dest, &w, &h); /*have to work with default - use max size (if only one video, this means the text region is the entire display, and with bottom alignment things should be fine...*/ gf_isom_set_track_layout_info(import->dest, track, w<<16, h<<16, 0, 0, 0); sd = (GF_TextSampleDescriptor*)gf_odf_desc_new(GF_ODF_TX3G_TAG); sd->fonts = (GF_FontRecord*)malloc(sizeof(GF_FontRecord)); sd->font_count = 1; sd->fonts[0].fontID = 1; sd->fonts[0].fontName = strdup(import->fontName ? import->fontName : "Serif"); sd->back_color = 0x00000000; /*transparent*/ sd->default_style.fontID = 1; sd->default_style.font_size = import->fontSize ? import->fontSize : TTXT_DEFAULT_FONT_SIZE; sd->default_style.text_color = 0xFFFFFFFF; /*white*/ sd->default_style.style_flags = 0; sd->horiz_justif = 1; /*center of scene*/ sd->vert_justif = (s8) -1; /*bottom of scene*/ if (import->flags & GF_IMPORT_SKIP_TXT_BOX) { sd->default_pos.top = sd->default_pos.left = sd->default_pos.right = sd->default_pos.bottom = 0; } else { if ((sd->default_pos.bottom==sd->default_pos.top) || (sd->default_pos.right==sd->default_pos.left)) { sd->default_pos.top = sd->default_pos.left = 0; sd->default_pos.right = w; sd->default_pos.bottom = h; } } /*store attribs*/ rec = sd->default_style; gf_isom_new_text_description(import->dest, track, sd, NULL, NULL, &state); gf_import_message(import, GF_OK, "Timed Text (SRT) import - text track %d x %d, font %s (size %d)", w, h, sd->fonts[0].fontName, rec.font_size); gf_odf_desc_del((GF_Descriptor *)sd); } gf_text_import_set_language(import, track); duration = (u32) (((Double) import->duration)*timescale/1000.0); e = GF_OK; state = 0; end = prev_end = 0; curLine = 0; txt_line = 0; set_start_char = set_end_char = 0; char_len = 0; start = 0; nb_samp = 0; samp = gf_isom_new_text_sample(); scale = timescale; scale /= 1000; first_samp = 1; while (1) { char *sOK = gf_text_get_utf8_line(szLine, 2048, srt_in, unicode_type); if (sOK) REM_TRAIL_MARKS(szLine, "\r\n\t ") if (!sOK || !strlen(szLine)) { state = 0; rec.style_flags = 0; rec.startCharOffset = rec.endCharOffset = 0; if (txt_line) { if (prev_end && (start != prev_end)) { GF_TextSample * empty_samp = gf_isom_new_text_sample(); s = gf_isom_text_to_sample(empty_samp); gf_isom_delete_text_sample(empty_samp); s->DTS = (u64) (scale*(s64)prev_end); s->IsRAP = 1; gf_isom_add_sample(import->dest, track, 1, s); gf_isom_sample_del(&s); nb_samp++; } s = gf_isom_text_to_sample(samp); s->DTS = (u64) (scale*(s64) start); s->IsRAP = 1; gf_isom_add_sample(import->dest, track, 1, s); gf_isom_sample_del(&s); nb_samp++; prev_end = end; txt_line = 0; char_len = 0; set_start_char = set_end_char = 0; rec.startCharOffset = rec.endCharOffset = 0; gf_isom_text_reset(samp); //gf_import_progress(import, nb_samp, nb_samp+1); gf_set_progress("Importing SRT", ftell(srt_in), file_size); if (duration && (end >= duration)) break; } if (!sOK) break; continue; } switch (state) { case 0: if (sscanf(szLine, "%d", &line) != 1) { e = gf_import_message(import, GF_CORRUPTED_DATA, "Bad SRT formatting - expecting number got \"%s\"", szLine); goto exit; } if (line != curLine + 1) gf_import_message(import, GF_OK, "WARNING: corrupted SRT frame %d after frame %d", line, curLine); curLine = line; state = 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -