diff --git a/frontend/OBSApp.cpp b/frontend/OBSApp.cpp index 36f3177284c75b..a5abdb5821f2e3 100644 --- a/frontend/OBSApp.cpp +++ b/frontend/OBSApp.cpp @@ -1341,8 +1341,10 @@ string GetFormatExt(const char *container) string ext = container; if (ext == "fragmented_mp4") ext = "mp4"; - if (ext == "hybrid_mp4") + else if (ext == "hybrid_mp4") ext = "mp4"; + else if (ext == "hybrid_mov") + ext = "mov"; else if (ext == "fragmented_mov") ext = "mov"; else if (ext == "hls") diff --git a/frontend/data/locale/en-US.ini b/frontend/data/locale/en-US.ini index 572b15f15117fb..c6872d1e601340 100644 --- a/frontend/data/locale/en-US.ini +++ b/frontend/data/locale/en-US.ini @@ -997,6 +997,7 @@ Basic.Settings.Output.Format.MOV="QuickTime (.mov)" Basic.Settings.Output.Format.TS="MPEG-TS (.ts)" Basic.Settings.Output.Format.HLS="HLS (.m3u8 + .ts)" Basic.Settings.Output.Format.hMP4="Hybrid MP4 [BETA] (.mp4)" +Basic.Settings.Output.Format.hMOV="Hybrid MOV [BETA] (.mov)" Basic.Settings.Output.Format.fMP4="Fragmented MP4 (.mp4)" Basic.Settings.Output.Format.fMOV="Fragmented MOV (.mov)" Basic.Settings.Output.Format.TT.fragmented_mov="Fragmented MOV writes the recording in chunks and does not require the same finalization as traditional MOV files.\nThis ensures the file remains playable even if writing to disk is interrupted, for example, as a result of a BSOD or power loss.\n\nThis may not be compatible with all players and editors. Use File → Remux Recordings to convert the file into a more compatible format if necessary." diff --git a/frontend/settings/OBSBasicSettings.cpp b/frontend/settings/OBSBasicSettings.cpp index dc7bc73fd916b4..ea232ed9d0e1eb 100644 --- a/frontend/settings/OBSBasicSettings.cpp +++ b/frontend/settings/OBSBasicSettings.cpp @@ -1048,6 +1048,7 @@ void OBSBasicSettings::LoadFormats() ui->simpleOutRecFormat->addItem(FORMAT_STR("MP4"), "mp4"); ui->simpleOutRecFormat->addItem(FORMAT_STR("MOV"), "mov"); ui->simpleOutRecFormat->addItem(FORMAT_STR("hMP4"), "hybrid_mp4"); + ui->simpleOutRecFormat->addItem(FORMAT_STR("hMOV"), "hybrid_mov"); ui->simpleOutRecFormat->addItem(FORMAT_STR("fMP4"), "fragmented_mp4"); ui->simpleOutRecFormat->addItem(FORMAT_STR("fMOV"), "fragmented_mov"); ui->simpleOutRecFormat->addItem(FORMAT_STR("TS"), "mpegts"); @@ -1057,6 +1058,7 @@ void OBSBasicSettings::LoadFormats() ui->advOutRecFormat->addItem(FORMAT_STR("MP4"), "mp4"); ui->advOutRecFormat->addItem(FORMAT_STR("MOV"), "mov"); ui->advOutRecFormat->addItem(FORMAT_STR("hMP4"), "hybrid_mp4"); + ui->advOutRecFormat->addItem(FORMAT_STR("hMOV"), "hybrid_mov"); ui->advOutRecFormat->addItem(FORMAT_STR("fMP4"), "fragmented_mp4"); ui->advOutRecFormat->addItem(FORMAT_STR("fMOV"), "fragmented_mov"); ui->advOutRecFormat->addItem(FORMAT_STR("TS"), "mpegts"); diff --git a/frontend/utility/AdvancedOutput.cpp b/frontend/utility/AdvancedOutput.cpp index 0d927721d1450c..e9097cb836a0e2 100644 --- a/frontend/utility/AdvancedOutput.cpp +++ b/frontend/utility/AdvancedOutput.cpp @@ -106,9 +106,13 @@ AdvancedOutput::AdvancedOutput(OBSBasic *main_) : BasicOutputHandler(main_) replayBufferSaved.Connect(signal, "saved", OBSReplayBufferSaved, this); } - bool native_muxer = strcmp(recFormat, "hybrid_mp4") == 0; - fileOutput = obs_output_create(native_muxer ? "mp4_output" : "ffmpeg_muxer", "adv_file_output", nullptr, - nullptr); + const char *mux = "ffmper_muxer"; + if (strcmp(recFormat, "hybrid_mp4") == 0) + mux = "mp4_output"; + else if (strcmp(recFormat, "hybrid_mov") == 0) + mux = "mov_output"; + + fileOutput = obs_output_create(mux, "adv_file_output", nullptr, nullptr); if (!fileOutput) throw "Failed to create recording output " "(advanced output)"; diff --git a/frontend/utility/FFmpegCodec.cpp b/frontend/utility/FFmpegCodec.cpp index b93edb56b6c087..0e385ae68a17b6 100644 --- a/frontend/utility/FFmpegCodec.cpp +++ b/frontend/utility/FFmpegCodec.cpp @@ -130,6 +130,18 @@ static const unordered_map> codec_compat = { "pcm_s24le", "pcm_f32le", }}, + // Not part of FFmpeg, see obs-outputs module + {"hybrid_mov", + { + "h264", + "hevc", + "prores", + "aac", + "alac", + "pcm_s16le", + "pcm_s24le", + "pcm_f32le", + }}, {"mov", { "h264", diff --git a/frontend/utility/SimpleOutput.cpp b/frontend/utility/SimpleOutput.cpp index 4e783dd7abd5a1..43dd38e2679666 100644 --- a/frontend/utility/SimpleOutput.cpp +++ b/frontend/utility/SimpleOutput.cpp @@ -226,9 +226,13 @@ SimpleOutput::SimpleOutput(OBSBasic *main_) : BasicOutputHandler(main_) replayBufferSaved.Connect(signal, "saved", OBSReplayBufferSaved, this); } - bool use_native = strcmp(recFormat, "hybrid_mp4") == 0; - fileOutput = obs_output_create(use_native ? "mp4_output" : "ffmpeg_muxer", "simple_file_output", - nullptr, nullptr); + const char *mux = "ffmper_muxer"; + if (strcmp(recFormat, "hybrid_mp4") == 0) + mux = "mp4_output"; + else if (strcmp(recFormat, "hybrid_mov") == 0) + mux = "mov_output"; + + fileOutput = obs_output_create(mux, "simple_file_output", nullptr, nullptr); if (!fileOutput) throw "Failed to create recording output " "(simple output)"; diff --git a/frontend/widgets/OBSBasic.cpp b/frontend/widgets/OBSBasic.cpp index b840930ef87a9c..ef085f20dfa37e 100644 --- a/frontend/widgets/OBSBasic.cpp +++ b/frontend/widgets/OBSBasic.cpp @@ -496,13 +496,19 @@ OBSBasic::OBSBasic(QWidget *parent) : OBSMainWindow(parent), undo_s(ui), ui(new static const double scaled_vals[] = {1.0, 1.25, (1.0 / 0.75), 1.5, (1.0 / 0.6), 1.75, 2.0, 2.25, 2.5, 2.75, 3.0, 0.0}; -#ifdef __APPLE__ +#ifdef __APPLE__ // macOS +#if OBS_RELEASE_CANDIDATE == 0 && OBS_BETA == 0 #define DEFAULT_CONTAINER "fragmented_mov" -#elif OBS_RELEASE_CANDIDATE == 0 && OBS_BETA == 0 +#else +#define DEFAULT_CONTAINER "hybrid_mov" +#endif +#else // Windows/Linux +#if OBS_RELEASE_CANDIDATE == 0 && OBS_BETA == 0 #define DEFAULT_CONTAINER "mkv" #else #define DEFAULT_CONTAINER "hybrid_mp4" #endif +#endif bool OBSBasic::InitBasicConfigDefaults() { diff --git a/plugins/obs-outputs/data/locale/en-US.ini b/plugins/obs-outputs/data/locale/en-US.ini index 5eded159fa7a38..411a631051eed8 100644 --- a/plugins/obs-outputs/data/locale/en-US.ini +++ b/plugins/obs-outputs/data/locale/en-US.ini @@ -11,6 +11,7 @@ MP4Output="MP4 File Output" MP4Output.FilePath="File Path" MP4Output.StartChapter="Start" MP4Output.UnnamedChapter="Unnamed" +MOVOutput="MOV File Output" IPFamily="IP Address Family" IPFamily.Both="IPv4 and IPv6 (Default)" diff --git a/plugins/obs-outputs/mp4-mux-internal.h b/plugins/obs-outputs/mp4-mux-internal.h index 85393f2c64f7f0..c0db2e823087f2 100644 --- a/plugins/obs-outputs/mp4-mux-internal.h +++ b/plugins/obs-outputs/mp4-mux-internal.h @@ -23,13 +23,6 @@ #include #include -/* Flavour for target compatibility */ -enum mp4_flavour { - MP4, /* ISO/IEC 14496-12 */ - MOV, /* Apple QuickTime */ - CMAF, /* ISO/IEC 23000-19 */ -}; - enum mp4_track_type { TRACK_UNKNOWN, TRACK_VIDEO, @@ -44,6 +37,7 @@ enum mp4_codec { CODEC_H264, CODEC_HEVC, CODEC_AV1, + CODEC_PRORES, /* Audio Codecs */ CODEC_AAC, @@ -97,7 +91,7 @@ struct mp4_track { /* Time Base (1/FPS for video, 1/sample rate for audio) */ uint32_t timebase_num; uint32_t timebase_den; - /* Output timescale calculated from time base (Video only) */ + /* Output timescale calculated from time base */ uint32_t timescale; /* First PTS this track has seen (in track timescale) */ @@ -133,7 +127,7 @@ struct mp4_mux { struct serializer *serializer; /* Target format compatibility */ - enum mp4_flavour mode; + enum mp4_flavour flavour; /* Flags */ enum mp4_mux_flags flags; @@ -340,3 +334,83 @@ static const char CHAPTER_PKT_FOOTER[12] = { 0x00, 0x00, 0x01, 0x00 }; /* clang-format on */ + +/** QTFF/MOV specifics **/ + +/* https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_2#LPCM-flag-values */ +enum lpcm_flags { + kAudioFormatFlagIsFloat = (1 << 0), + kAudioFormatFlagIsSignedInteger = (1 << 2), + kAudioFormatFlagIsPacked = (1 << 3), + kLinearPCMFormatFlagIsFloat = kAudioFormatFlagIsFloat, + kLinearPCMFormatFlagIsSignedInteger = kAudioFormatFlagIsSignedInteger, + kLinearPCMFormatFlagIsPacked = kAudioFormatFlagIsPacked, +}; + +static inline uint32_t get_lpcm_flags(enum mp4_codec codec) +{ + if (codec == CODEC_PCM_F32) + return kLinearPCMFormatFlagIsFloat | kLinearPCMFormatFlagIsPacked; + if (codec == CODEC_PCM_I16 || codec == CODEC_PCM_I24) + return kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; + + return 0; +} + +enum channel_map_bits { + FL = 1 << 0, + FR = 1 << 1, + FC = 1 << 2, + LFE = 1 << 3, + RL = 1 << 4, + RR = 1 << 5, + RC = 1 << 8, + SL = 1 << 9, + SR = 1 << 10, +}; + +static uint32_t get_mov_channel_bitmap(enum speaker_layout layout) +{ + switch (layout) { + case SPEAKERS_MONO: + return FC; + case SPEAKERS_STEREO: + return FL | FR; + case SPEAKERS_2POINT1: + return FL | FR | LFE; + case SPEAKERS_4POINT0: + return FL | FR | FC | RC; + case SPEAKERS_4POINT1: + return FL | FR | FC | LFE | RC; + case SPEAKERS_5POINT1: + return FL | FR | FC | LFE | RL | RR; + case SPEAKERS_7POINT1: + return FL | FR | FC | LFE | RL | RR | SL | SR; + case SPEAKERS_UNKNOWN: + break; + } + + return 0; +} + +enum coreaudio_layout { + kAudioChannelLayoutTag_UseChannelBitmap = (1 << 16) | 0, + kAudioChannelLayoutTag_Mono = (100 << 16) | 1, + kAudioChannelLayoutTag_Stereo = (101 << 16) | 2, + kAudioChannelLayoutTag_DVD_4 = (133 << 16) | 3, // 2.1 (AAC Only) +}; + +static enum coreaudio_layout get_mov_channel_layout(enum mp4_codec codec, enum speaker_layout layout) +{ + switch (layout) { + case SPEAKERS_MONO: + return kAudioChannelLayoutTag_Mono; + case SPEAKERS_STEREO: + return kAudioChannelLayoutTag_Stereo; + case SPEAKERS_2POINT1: + /* Only supported for AAC. */ + return codec == CODEC_AAC ? kAudioChannelLayoutTag_DVD_4 : kAudioChannelLayoutTag_UseChannelBitmap; + default: + return kAudioChannelLayoutTag_UseChannelBitmap; + } +} diff --git a/plugins/obs-outputs/mp4-mux.c b/plugins/obs-outputs/mp4-mux.c index 1f253cc41ec0c3..c559fa7608050c 100644 --- a/plugins/obs-outputs/mp4-mux.c +++ b/plugins/obs-outputs/mp4-mux.c @@ -37,8 +37,9 @@ * Standard identifier is included if not referring to ISO/IEC 14496-12. */ -#define do_log(level, format, ...) \ - blog(level, "[mp4 muxer: '%s'] " format, obs_output_get_name(mux->output), ##__VA_ARGS__) +#define do_log(level, format, ...) \ + blog(level, "[%s muxer: '%s'] " format, mux->flavour == FLAVOUR_MOV ? "mov" : "mp4", \ + obs_output_get_name(mux->output), ##__VA_ARGS__) #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__) #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__) @@ -86,44 +87,49 @@ static size_t mp4_write_ftyp(struct mp4_mux *mux, bool fragmented) write_box(s, 0, "ftyp"); - const char *major_brand = "isom"; - /* Following FFmpeg's example, when using negative CTS the major brand - * needs to be either iso4 or iso6 depending on whether the file is - * currently fragmented. */ - if (mux->flags & MP4_USE_NEGATIVE_CTS) - major_brand = fragmented ? "iso6" : "iso4"; - - s_write(s, major_brand, 4); // major brand - s_wb32(s, 512); // minor version - - // minor brands (first one matches major brand) - s_write(s, major_brand, 4); - - /* Write isom base brand if it's not the major brand */ - if (strcmp(major_brand, "isom") != 0) - s_write(s, "isom", 4); - - /* Avoid adding newer brand (iso6) unless necessary, use "obs1" brand - * as a placeholder to maintain ftyp box size. */ - if (fragmented && strcmp(major_brand, "iso6") != 0) - s_write(s, "iso6", 4); - else - s_write(s, "obs1", 4); - - s_write(s, "iso2", 4); - - /* Include H.264 brand if used */ - for (size_t i = 0; i < mux->tracks.num; i++) { - struct mp4_track *track = &mux->tracks.array[i]; - if (track->type == TRACK_VIDEO) { - if (track->codec == CODEC_H264) - s_write(s, "avc1", 4); - break; + if (mux->flavour == FLAVOUR_MOV) { + /* For MOV the brand is just "qt" followed by two spaces. */ + s_write(s, "qt ", 4); // major brand + s_wb32(s, 0x20140200); // minor version (BCD YYYYMM00 per QTFF spec) + s_write(s, "qt ", 4); // minor brand + } else { + const char *major_brand = "isom"; + /* Following FFmpeg's example, when using negative CTS the major brand + * needs to be either iso4 or iso6 depending on whether the file is + * currently fragmented. */ + if (mux->flags & MP4_USE_NEGATIVE_CTS) + major_brand = fragmented ? "iso6" : "iso4"; + + s_write(s, major_brand, 4); // major brand + s_wb32(s, 0); // minor version + s_write(s, major_brand, 4); // minor brands (first one matches major brand) + + /* Write isom base brand if it's not the major brand */ + if (strcmp(major_brand, "isom") != 0) + s_write(s, "isom", 4); + + /* Avoid adding newer brand (iso6) unless necessary, use "obs1" brand + * as a placeholder to maintain ftyp box size. */ + if (fragmented && strcmp(major_brand, "iso6") != 0) + s_write(s, "iso6", 4); + else + s_write(s, "obs1", 4); + + s_write(s, "iso2", 4); + + /* Include H.264 brand if used */ + for (size_t i = 0; i < mux->tracks.num; i++) { + struct mp4_track *track = &mux->tracks.array[i]; + if (track->type == TRACK_VIDEO) { + if (track->codec == CODEC_H264) + s_write(s, "avc1", 4); + break; + } } - } - /* General MP4 brannd */ - s_write(s, "mp41", 4); + /* General MP4 brannd */ + s_write(s, "mp41", 4); + } return write_box_size(s, start); } @@ -136,7 +142,7 @@ static size_t mp4_write_free(struct mp4_mux *mux) /* Write a 16-byte free box, so it can be replaced with a 64-bit size * box header (u32 + char[4] + u64) */ s_wb32(s, 16); - s_write(s, "free", 4); + s_write(s, mux->flavour == FLAVOUR_MOV ? "wide" : "free", 4); s_wb64(s, 0); return 16; @@ -265,6 +271,11 @@ static size_t mp4_write_mdhd(struct mp4_mux *mux, struct mp4_track *track) /* use 64-bit duration if necessary */ if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) { + if (mux->flavour == FLAVOUR_MOV) { + /* QTFF does not specify how to handle 32-bit overflow for duration/timestamps. */ + warn("Duration too large for MOV, this file may be unplayable in QuickTime!"); + } + size = 44; version = 1; } @@ -283,8 +294,8 @@ static size_t mp4_write_mdhd(struct mp4_mux *mux, struct mp4_track *track) s_wb32(s, (uint32_t)duration); // duration } - s_wb16(s, 21956); // language (undefined) - s_wb16(s, 0); // pre_defined + s_wb16(s, mux->flavour == FLAVOUR_MOV ? 32767 : 21956); // language (undefined) + s_wb16(s, 0); // pre_defined return size; } @@ -297,10 +308,15 @@ static size_t mp4_write_hdlr(struct mp4_mux *mux, struct mp4_track *track) write_fullbox(s, 0, "hdlr", 0, 0); - s_wb32(s, 0); // pre_defined + if (mux->flavour == FLAVOUR_MOV) + s_write(s, track ? "mhlr" : "dhlr", 4); + else + s_wb32(s, 0); // pre_defined // handler_type - if (track->type == TRACK_VIDEO) + if (!track) + s_write(s, "url ", 4); + else if (track->type == TRACK_VIDEO) s_write(s, "vide", 4); else if (track->type == TRACK_CHAPTERS) s_write(s, "text", 4); @@ -311,13 +327,25 @@ static size_t mp4_write_hdlr(struct mp4_mux *mux, struct mp4_track *track) s_wb32(s, 0); // reserved s_wb32(s, 0); // reserved - // name (utf-8 string, null terminated) - if (track->type == TRACK_VIDEO) - s_write(s, "OBS Video Handler", 18); + const char *handler_name; + if (!track) + handler_name = "OBS Data Handler"; + else if (track->type == TRACK_VIDEO) + handler_name = "OBS Video Handler"; else if (track->type == TRACK_CHAPTERS) - s_write(s, "OBS Chapter Handler", 20); + handler_name = "OBS Chapter Handler"; else - s_write(s, "OBS Audio Handler", 18); + handler_name = "OBS Audio Handler"; + + // name (null terminated for MP4, pascal string for MOV) + size_t handler_len = strlen(handler_name); + if (mux->flavour == FLAVOUR_MOV) { + s_w8(s, (uint8_t)handler_len); + s_write(s, handler_name, handler_len); + } else { + s_write(s, handler_name, handler_len); + s_w8(s, 0); // NULL terminator + } return write_box_size(s, start); } @@ -534,9 +562,16 @@ static inline void mp4_write_visual_sample_entry(struct mp4_mux *mux, obs_encode // VisualSampleEntry Box s_wb16(s, 0); // pre_defined s_wb16(s, 0); // reserved - s_wb32(s, 0); // pre_defined - s_wb32(s, 0); // pre_defined - s_wb32(s, 0); // pre_defined + + if (mux->flavour == FLAVOUR_MOV) { + s_write(s, "OBSS", 4); // vendor + s_wb32(s, 0x200); // temporal quality (codecNormalQuality = 512) + s_wb32(s, 0x200); // spatial quality (codecNormalQuality) + } else { + s_wb32(s, 0); // pre_defined + s_wb32(s, 0); // pre_defined + s_wb32(s, 0); // pre_defined + } s_wb16(s, (uint16_t)obs_encoder_get_width(enc)); // width s_wb16(s, (uint16_t)obs_encoder_get_height(enc)); // height @@ -701,6 +736,47 @@ static size_t mp4_write_av01(struct mp4_mux *mux, obs_encoder_t *enc) return write_box_size(s, start); } +/// (QTFF/Apple) Video Sample Description +static size_t mp4_write_prores(struct mp4_mux *mux, obs_encoder_t *enc) +{ + struct serializer *s = mux->serializer; + int64_t start = serializer_get_pos(s); + + /* We get the tag as an int, but need it as a char[4] */ + union tag { + char c[4]; + uint32_t i; + } codec_tag; + + /* Codec tag varies for ProRes depending on configuration, so we need to get it from the encoder. */ + obs_data_t *settings = obs_encoder_get_settings(enc); + codec_tag.i = (uint32_t)obs_data_get_int(settings, "codec_type"); + obs_data_release(settings); + +#if __BYTE_ORDER == __LITTLE_ENDIAN + codec_tag.i = ((codec_tag.i >> 24) & 0x000000FF) | ((codec_tag.i << 8) & 0x00FF0000) | + ((codec_tag.i >> 8) & 0x0000FF00) | ((codec_tag.i << 24) & 0xFF000000); +#endif + + write_box(s, 0, codec_tag.c); + + mp4_write_visual_sample_entry(mux, enc); + + // colr + mp4_write_colr(mux, enc); + + // clli + mp4_write_clli(mux, enc); + + // mdcv + mp4_write_mdcv(mux, enc); + + // pasp + mp4_write_pasp(mux); + + return write_box_size(s, start); +} + static inline void put_descr(struct serializer *s, uint8_t tag, size_t size) { int i = 3; @@ -772,6 +848,8 @@ static size_t mp4_write_esds(struct mp4_mux *mux, struct mp4_track *track) static inline void mp4_write_audio_sample_entry(struct mp4_mux *mux, struct mp4_track *track, uint8_t version) { struct serializer *s = mux->serializer; + bool is_mov = mux->flavour == FLAVOUR_MOV; + bool is_pcm = track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 || track->codec == CODEC_PCM_F32; // SampleEntry Box s_w8(s, 0); // reserved @@ -784,33 +862,70 @@ static inline void mp4_write_audio_sample_entry(struct mp4_mux *mux, struct mp4_ s_wb16(s, 1); // data_reference_index // AudioSampleEntry Box - if (version == 1) { - s_wb16(s, 1); // entry_version - s_wb16(s, 0); // reserved - s_wb16(s, 0); // reserved - s_wb16(s, 0); // reserved - } else { - s_wb32(s, 0); // reserved - s_wb32(s, 0); // reserved - } + s_wb16(s, version); // entry_version + s_wb16(s, 0); // reserved + s_wb16(s, 0); // reserved + s_wb16(s, 0); // reserved audio_t *audio = obs_encoder_audio(track->encoder); - size_t channels = audio_output_get_channels(audio); + uint32_t channels = (uint32_t)audio_output_get_channels(audio); uint32_t sample_rate = track->timescale; bool alac = track->codec == CODEC_ALAC; - s_wb16(s, (uint32_t)channels); // channelcount - - /* OBS FLAC is currently always 16 bit, ALAC always 24, this may change - * in the futrure and should be handled differently then. - * That being said thoes codecs are self-describing so in most cases it - * shouldn't matter either way. */ - s_wb16(s, alac ? 24 : 16); // samplesize + /* MOV specific version: https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_2 */ + if (version == 2) { + // We need to get the raw float bytes, union seems to be the easiest way to do that. + union rate { + uint64_t u; + double f; + } rate; + rate.f = (double)sample_rate; + + s_wb16(s, 3); // always3 + s_wb16(s, 16); // always16 + s_wb16(s, 0xfffe); // alwaysMinus2 + s_wb16(s, 0); // always0 + s_wb32(s, 0x00010000); // always65536 + s_wb32(s, 72); // sizeOfStructOnly (start of containing box to constLPCMFramesPerAudioPacket) + s_wb64(s, rate.u); // audioSampleRate + s_wb32(s, channels); // numAudioChannels + s_wb32(s, 0x7F000000); // always7F000000 + s_wb32(s, is_pcm ? track->sample_size / channels * 8 : 0); // constBitsPerChannel + s_wb32(s, get_lpcm_flags(track->codec)); // formatSpecificFlags + s_wb32(s, is_pcm ? track->sample_size : 0); // constBytesPerAudioPacket + s_wb32(s, is_pcm ? 1 : 0); // constLPCMFramesPerAudioPacket + } else { + s_wb16(s, channels); // channelcount + + /* OBS FLAC is currently always 16 bit, ALAC always 24, this may change + * in the futrure and should be handled differently then. + * That being said thoes codecs are self-describing so in most cases it + * shouldn't matter either way. */ + s_wb16(s, !is_mov && alac ? 24 : 16); // samplesize + + s_wb16(s, is_mov && !is_pcm ? -2 : 0); // pre_defined (compression ID in MOV) + s_wb16(s, 0); // reserved + + /* The sample rate field is limited to 16-bits. Technically version 1 supports a "srat" box wich + * provides 32 bits, but this is not supported by most software (including FFmpeg and Chromium). + * For encoded codecs (AAC etc.), the sample rate can be read from the encoded data itself. + * For PCM FFmpeg will try to use the timescale as sample rate. */ + if (sample_rate > UINT16_MAX) { + warn("Sample rate too high for MP4, file may not play back correctly."); + sample_rate = 0; + } - s_wb16(s, 0); // pre_defined - s_wb16(s, 0); // reserved + s_wb32(s, sample_rate << 16); // samplerate - s_wb32(s, sample_rate << 16); // samplerate + /* MOV-only data: https://developer.apple.com/documentation/quicktime-file-format/sound_sample_description_version_1 */ + if (is_mov && version == 1) { + size_t frame_size = obs_encoder_get_frame_size(track->encoder); + s_wb32(s, is_pcm ? 1 : (uint32_t)frame_size); // frame size + s_wb32(s, is_pcm ? track->sample_size / channels : 0); // bytes per packet + s_wb32(s, is_pcm ? track->sample_size : 0); // bytes per frame + s_wb32(s, 2); // bytes per sample, 2 for anything but 8-bit + } + } } /// 12.2.4 Channel layout @@ -1055,6 +1170,100 @@ static size_t mp4_write_Opus(struct mp4_mux *mux, struct mp4_track *track, uint8 return write_box_size(s, start); } +/// (QTFF/Apple) siDecompressionParam Atom ('wave') +static size_t mp4_write_wave(struct mp4_mux *mux, struct mp4_track *track, const char tag[4]) +{ + struct serializer *s = mux->serializer; + int64_t start = serializer_get_pos(s); + + write_box(s, 0, "wave"); + + /* frma atom containing codec tag (again) */ + s_wb32(s, 12); + s_write(s, "frma", 4); + s_write(s, tag, 4); + + if (track->codec == CODEC_AAC) { + mp4_write_esds(mux, track); + } else if (track->codec == CODEC_ALAC) { + uint8_t *extradata; + size_t extradata_size; + + if (obs_encoder_get_extra_data(track->encoder, &extradata, &extradata_size)) { + /* Apple Lossless Magic Cookie */ + s_write(s, extradata, extradata_size); + } + } + + /* Terminator atom */ + s_wb32(s, 8); // size + s_wb32(s, 0); // NULL name + + return write_box_size(s, start); +} + +/// (QTFF/Apple) Audio Channel Layout Atom (‘chan’) +static size_t mp4_write_chan(struct mp4_mux *mux, struct mp4_track *track) +{ + struct serializer *s = mux->serializer; + int64_t start = serializer_get_pos(s); + + audio_t *audio = obs_encoder_audio(track->encoder); + const struct audio_output_info *info = audio_output_get_info(audio); + uint32_t layout = get_mov_channel_layout(track->codec, info->speakers); + uint32_t bitmap = layout == kAudioChannelLayoutTag_UseChannelBitmap ? get_mov_channel_bitmap(info->speakers) + : 0; + if (layout == kAudioChannelLayoutTag_UseChannelBitmap && !bitmap) { + warn("No valid speaker layout found, not writing chan box. File may not play back correctly!"); + return 0; + } + + write_fullbox(s, 0, "chan", 0, 0); + /* AudioChannelLayout from CoreAudioTypes.h */ + s_wb32(s, layout); // mChannelLayoutTag + s_wb32(s, bitmap); // mChannelBitmap + s_wb32(s, 0); // mNumberChannelDescriptions + + return write_box_size(s, start); +} + +/// (QTFF/Apple) Sound Sample Description (v1 and v2) +static size_t mp4_write_mov_audio_tag(struct mp4_mux *mux, struct mp4_track *track) +{ + struct serializer *s = mux->serializer; + int64_t start = serializer_get_pos(s); + + const char *tag = NULL; + audio_t *audio = obs_encoder_audio(track->encoder); + uint32_t sample_rate = audio_output_get_sample_rate(audio); + size_t channels = audio_output_get_channels(audio); + /* More than 2 channels or samples rates above 65535 Hz requires v2 */ + uint8_t version = (channels > 2 || sample_rate > UINT16_MAX) ? 2 : 1; + + if (track->codec == CODEC_PCM_F32 || track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24) { + tag = "lpcm"; + version = 2; /* lpcm also requires v2 */ + } else if (track->codec == CODEC_AAC) { + tag = "mp4a"; + } else if (track->codec == CODEC_ALAC) { + tag = "alac"; + } + + if (!tag) /* Unsupported/Unknown codec */ + return 0; + + write_box(s, 0, tag); + + mp4_write_audio_sample_entry(mux, track, version); + // wave + if (version == 1) + mp4_write_wave(mux, track, tag); + // chan + mp4_write_chan(mux, track); + + return write_box_size(s, start); +} + /// 8.5.2 Sample Description Box static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track) { @@ -1065,7 +1274,7 @@ static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track) * but in practice that doesn't appear to matter. */ uint8_t version = 0; - if (track->type == TRACK_AUDIO) { + if (track->type == TRACK_AUDIO && mux->flavour != FLAVOUR_MOV) { audio_t *audio = obs_encoder_audio(track->encoder); version = audio_output_get_channels(audio) > 2 ? 1 : 0; } @@ -1082,18 +1291,24 @@ static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track) mp4_write_hvc1(mux, track->encoder); else if (track->codec == CODEC_AV1) mp4_write_av01(mux, track->encoder); + else if (track->codec == CODEC_PRORES) + mp4_write_prores(mux, track->encoder); } else if (track->type == TRACK_AUDIO) { - if (track->codec == CODEC_AAC) - mp4_write_mp4a(mux, track, version); - else if (track->codec == CODEC_OPUS) - mp4_write_Opus(mux, track, version); - else if (track->codec == CODEC_FLAC) - mp4_write_fLaC(mux, track, version); - else if (track->codec == CODEC_ALAC) - mp4_write_alac(mux, track, version); - else if (track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 || - track->codec == CODEC_PCM_F32) - mp4_write_xpcm(mux, track, version); + if (mux->flavour == FLAVOUR_MOV) { + mp4_write_mov_audio_tag(mux, track); + } else { + if (track->codec == CODEC_AAC) + mp4_write_mp4a(mux, track, version); + else if (track->codec == CODEC_OPUS) + mp4_write_Opus(mux, track, version); + else if (track->codec == CODEC_FLAC) + mp4_write_fLaC(mux, track, version); + else if (track->codec == CODEC_ALAC) + mp4_write_alac(mux, track, version); + else if (track->codec == CODEC_PCM_I16 || track->codec == CODEC_PCM_I24 || + track->codec == CODEC_PCM_F32) + mp4_write_xpcm(mux, track, version); + } } else if (track->type == TRACK_CHAPTERS) { mp4_write_text(mux); } @@ -1415,8 +1630,8 @@ static size_t mp4_write_stbl(struct mp4_mux *mux, struct mp4_track *track, bool // stts mp4_write_stts(mux, track, fragmented); - // stss (non-fragmented only) - if (track->type == TRACK_VIDEO && !fragmented) + // stss (non-fragmented/non-prores only) + if (track->type == TRACK_VIDEO && !fragmented && track->codec != CODEC_PRORES) mp4_write_stss(mux, track); // ctts (non-fragmented only) @@ -1506,6 +1721,10 @@ static size_t mp4_write_minf(struct mp4_mux *mux, struct mp4_track *track, bool else mp4_write_smhd(mux); + // hdlr for dinf, required in MOV only + if (mux->flavour == FLAVOUR_MOV) + mp4_write_hdlr(mux, NULL); + // dinf, unnecessary but mandatory mp4_write_dinf(mux); @@ -1760,6 +1979,22 @@ static size_t mp4_write_data_atom(struct mp4_mux *mux, const char *data) return size; } +/// (QTFF/Apple) String atom +static size_t mp4_write_string_data_atom(struct mp4_mux *mux, const char name[4], const char *data) +{ + struct serializer *s = mux->serializer; + int64_t start = serializer_get_pos(s); + + uint16_t len = (uint16_t)strlen(data); + + write_box(s, 0, name); + s_wb16(s, len); // String length + s_write(s, "\x55\xC4", 2); // language code, just using undefined + s_write(s, data, len); // Note: No NULL terminator + + return write_box_size(s, start); +} + /// (QTFF/Apple) Metadata item atom static size_t mp4_write_ilst_item_atom(struct mp4_mux *mux, const char name[4], const char *value) { @@ -1935,8 +2170,21 @@ static size_t mp4_write_udta(struct mp4_mux *mux) /* Normally metadata would be directly in the moov, but since this is * Apple/QTFF format metadata it is inside udta. */ - // meta - mp4_write_meta(mux); + if (mux->flavour == FLAVOUR_MOV && !(mux->flags & MP4_USE_MDTA_KEY_VALUE)) { + // keys directly in udta atom + struct dstr value = {0}; + + /* Encoder name */ + dstr_cat(&value, "OBS Studio ("); + dstr_cat(&value, obs_get_version_string()); + dstr_cat(&value, ")"); + mp4_write_string_data_atom(mux, "\251swr", value.array); + + dstr_free(&value); + } else { + // meta + mp4_write_meta(mux); + } return write_box_size(s, start); } @@ -2485,6 +2733,8 @@ static inline enum mp4_codec get_codec(obs_encoder_t *enc) return CODEC_HEVC; if (strcmp(codec, "av1") == 0) return CODEC_AV1; + if (strcmp(codec, "prores") == 0) + return CODEC_PRORES; if (strcmp(codec, "aac") == 0) return CODEC_AAC; if (strcmp(codec, "opus") == 0) @@ -2520,11 +2770,6 @@ static inline void add_track(struct mp4_mux *mux, obs_encoder_t *enc) track->timebase_den = info->fps_num; track->timescale = track->timebase_den; - /* FFmpeg does this to compensate for non-monotonic timestamps, - * we probably don't need it, but let's stick to what they do - * for maximum compatibility. */ - while (track->timescale < 10000) - track->timescale *= 2; } else { uint32_t sample_rate = obs_encoder_get_sample_rate(enc); /* Opus is always 48 kHz */ @@ -2583,16 +2828,24 @@ static inline void free_track(struct mp4_track *track) /* ===========================================================================*/ /* API */ -struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags) +struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags, + enum mp4_flavour flavour) { struct mp4_mux *mux = bzalloc(sizeof(struct mp4_mux)); mux->output = output; mux->serializer = serializer; mux->flags = flags; + mux->flavour = flavour; /* Timestamp is based on 1904 rather than 1970. */ mux->creation_time = time(NULL) + 0x7C25B080; + if (flavour == FLAVOUR_MOV && mux->creation_time > UINT32_MAX) { + /* This will only happen in 2040 but better safe than sorry! */ + warn("Creation time too large for MOV, setting to 0 (unset)."); + mux->creation_time = 0; + } + for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) { obs_encoder_t *enc = obs_output_get_video_encoder2(output, i); if (!enc) @@ -2658,6 +2911,8 @@ bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt) obs_parse_hevc_packet(&parsed_packet, pkt); else if (track->codec == CODEC_AV1) obs_parse_av1_packet(&parsed_packet, pkt); + else if (track->codec == CODEC_PRORES) + obs_encoder_packet_ref(&parsed_packet, pkt); /* Set fragmentation PTS if packet is keyframe and PTS > 0 */ if (parsed_packet.keyframe && parsed_packet.pts > 0) { @@ -2706,7 +2961,7 @@ bool mp4_mux_finalise(struct mp4_mux *mux) info("Number of fragments: %u", mux->fragments_written); if (mux->flags & MP4_SKIP_FINALISATION) { - warn("Skipping MP4 finalization!"); + warn("Skipping finalization!"); return true; } diff --git a/plugins/obs-outputs/mp4-mux.h b/plugins/obs-outputs/mp4-mux.h index ccb7b7f4b31d93..e176f2f53d3f1e 100644 --- a/plugins/obs-outputs/mp4-mux.h +++ b/plugins/obs-outputs/mp4-mux.h @@ -22,6 +22,13 @@ struct mp4_mux; +/* Flavour for target compatibility */ +enum mp4_flavour { + FLAVOUR_MP4, /* ISO/IEC 14496-12 */ + FLAVOUR_MOV, /* Apple QuickTime */ + FLAVOUR_CMAF, /* ISO/IEC 23000-19 (not yet implemented) */ +}; + enum mp4_mux_flags { /* Uses mdta key/value list for metadata instead of QuickTime keys */ MP4_USE_MDTA_KEY_VALUE = 1 << 0, @@ -33,7 +40,8 @@ enum mp4_mux_flags { MP4_USE_NEGATIVE_CTS = 1 << 3, }; -struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags); +struct mp4_mux *mp4_mux_create(obs_output_t *output, struct serializer *serializer, enum mp4_mux_flags flags, + enum mp4_flavour flavour); void mp4_mux_destroy(struct mp4_mux *mux); bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt); bool mp4_mux_add_chapter(struct mp4_mux *mux, int64_t dts_usec, const char *name); diff --git a/plugins/obs-outputs/mp4-output.c b/plugins/obs-outputs/mp4-output.c index 130ca01baaaea5..fce76389f7ba8b 100644 --- a/plugins/obs-outputs/mp4-output.c +++ b/plugins/obs-outputs/mp4-output.c @@ -27,8 +27,9 @@ #include -#define do_log(level, format, ...) \ - blog(level, "[mp4 output: '%s'] " format, obs_output_get_name(out->output), ##__VA_ARGS__) +#define do_log(level, format, ...) \ + blog(level, "[%s output: '%s'] " format, out->muxer_flavour == FLAVOUR_MOV ? "mov" : "mp4", \ + obs_output_get_name(out->output), ##__VA_ARGS__) #define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__) #define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__) @@ -54,6 +55,7 @@ struct mp4_output { pthread_mutex_t mutex; struct mp4_mux *muxer; + enum mp4_flavour muxer_flavour; int flags; int64_t last_dts_usec; @@ -136,6 +138,12 @@ static const char *mp4_output_name(void *unused) return obs_module_text("MP4Output"); } +static const char *mov_output_name(void *unused) +{ + UNUSED_PARAMETER(unused); + return obs_module_text("MOVOutput"); +} + static void mp4_output_destory(void *data) { struct mp4_output *out = data; @@ -187,10 +195,11 @@ static void split_file_proc(void *data, calldata_t *cd) os_atomic_set_bool(&out->manual_split, true); } -static void *mp4_output_create(obs_data_t *settings, obs_output_t *output) +static void *mp4_output_create_internal(obs_data_t *settings, obs_output_t *output, enum mp4_flavour flavour) { struct mp4_output *out = bzalloc(sizeof(struct mp4_output)); out->output = output; + out->muxer_flavour = flavour; pthread_mutex_init(&out->mutex, NULL); signal_handler_t *sh = obs_output_get_signal_handler(output); @@ -204,6 +213,16 @@ static void *mp4_output_create(obs_data_t *settings, obs_output_t *output) return out; } +static void *mp4_output_create(obs_data_t *settings, obs_output_t *output) +{ + return mp4_output_create_internal(settings, output, FLAVOUR_MP4); +} + +static void *mov_output_create(obs_data_t *settings, obs_output_t *output) +{ + return mp4_output_create_internal(settings, output, FLAVOUR_MOV); +} + static inline void apply_flag(int *flags, const char *value, int flag_value) { if (atoi(value)) @@ -268,16 +287,16 @@ static bool mp4_output_start(void *data) obs_data_release(settings); if (!buffered_file_serializer_init_defaults(&out->serializer, out->path.array)) { - warn("Unable to open MP4 file '%s'", out->path.array); + warn("Unable to open file '%s'", out->path.array); return false; } /* Initialise muxer and start capture */ - out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags); + out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags, out->muxer_flavour); os_atomic_set_bool(&out->active, true); obs_output_begin_data_capture(out->output, 0); - info("Writing Hybrid MP4 file '%s'...", out->path.array); + info("Writing Hybrid MP4/MOV file '%s'...", out->path.array); return true; } @@ -385,18 +404,18 @@ static bool change_file(struct mp4_output *out, struct encoder_packet *pkt) da_clear(out->chapters); - info("MP4 file split complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000); + info("File split complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000); /* open new file */ generate_filename(out, &out->path, out->allow_overwrite); info("Changing output file to '%s'", out->path.array); if (!buffered_file_serializer_init_defaults(&out->serializer, out->path.array)) { - warn("Unable to open MP4 file '%s'", out->path.array); + warn("Unable to open file '%s'", out->path.array); return false; } - out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags); + out->muxer = mp4_mux_create(out->output, &out->serializer, out->flags, out->muxer_flavour); calldata_t cd = {0}; signal_handler_t *sh = obs_output_get_signal_handler(out->output); @@ -456,7 +475,7 @@ static void mp4_output_actual_stop(struct mp4_output *out, int code) da_clear(out->chapters); - info("MP4 file output complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000); + info("File output complete. Finalization took %" PRIu64 " ms.", (os_gettime_ns() - start_time) / 1000000); } static void push_back_packet(struct mp4_output *out, struct encoder_packet *packet) @@ -584,7 +603,7 @@ struct obs_output_info mp4_output_info = { .id = "mp4_output", .flags = OBS_OUTPUT_AV | OBS_OUTPUT_ENCODED | OBS_OUTPUT_MULTI_TRACK_AV | OBS_OUTPUT_CAN_PAUSE, .encoded_video_codecs = "h264;hevc;av1", - .encoded_audio_codecs = "aac", + .encoded_audio_codecs = "aac;alac;flac;opus", .get_name = mp4_output_name, .create = mp4_output_create, .destroy = mp4_output_destory, @@ -594,3 +613,18 @@ struct obs_output_info mp4_output_info = { .get_properties = mp4_output_properties, .get_total_bytes = mp4_output_total_bytes, }; + +struct obs_output_info mov_output_info = { + .id = "mov_output", + .flags = OBS_OUTPUT_AV | OBS_OUTPUT_ENCODED | OBS_OUTPUT_MULTI_TRACK_AV | OBS_OUTPUT_CAN_PAUSE, + .encoded_video_codecs = "h264;hevc;prores", + .encoded_audio_codecs = "aac;alac", + .get_name = mov_output_name, + .create = mov_output_create, + .destroy = mp4_output_destory, + .start = mp4_output_start, + .stop = mp4_output_stop, + .encoded_packet = mp4_output_packet, + .get_properties = mp4_output_properties, + .get_total_bytes = mp4_output_total_bytes, +}; diff --git a/plugins/obs-outputs/obs-outputs.c b/plugins/obs-outputs/obs-outputs.c index c3adc5d2f3cd32..02371260c912e4 100644 --- a/plugins/obs-outputs/obs-outputs.c +++ b/plugins/obs-outputs/obs-outputs.c @@ -16,6 +16,7 @@ extern struct obs_output_info rtmp_output_info; extern struct obs_output_info null_output_info; extern struct obs_output_info flv_output_info; extern struct obs_output_info mp4_output_info; +extern struct obs_output_info mov_output_info; #if defined(_WIN32) && defined(MBEDTLS_THREADING_ALT) void mbed_mutex_init(mbedtls_threading_mutex_t *m) @@ -63,6 +64,7 @@ bool obs_module_load(void) obs_register_output(&null_output_info); obs_register_output(&flv_output_info); obs_register_output(&mp4_output_info); + obs_register_output(&mov_output_info); return true; }