makevideo.py

import os
import zipfile
import re
import fitz
from PIL import Image, ImageDraw
import shutil
import glob
import pickle
import argparse


def main(args):

    files = glob.glob(os.path.join(f"{args.paperid}_files", "zipfile.zip"))
    if files:
        zip_name = max(files, key=os.path.getmtime)
        # time_str = zip_name.split('-')[1].strip('.zip')
        dr = os.path.join(f"{args.paperid}_files", "output")
    else:
        return

    if os.path.exists(dr):
        shutil.rmtree(dr)

    with zipfile.ZipFile(zip_name, 'r') as zipf:
        # Extract all the contents of the zip file
        zipf.extractall(dr)

    with open(os.path.join(dr, "mp3_list.txt"), "r") as f:
        lines = f.readlines()

    # create list of chunks
    outvideo = open(os.path.join(dr, 'mp4_list.txt'), 'w')

    block_coords = pickle.load(open(os.path.join(dr, 'block_coords.pkl'), 'rb'))
    gptpagemap = pickle.load(open(os.path.join(dr, 'gptpagemap.pkl'), 'rb'))

    # Process each line
    for line in lines:

        # Remove the newline character at the end of the line
        line = line.strip()

        # Split the line into components
        components = line.split()

        # The filename is the second component
        audio = components[1].replace('.mp3', '')
        video = audio.replace('-', '')

        # The number is the fourth component (without the #)
        match = re.search(r'page(\d+)', components[1])
        page_num = int(match.group(1))

        # extract first page of PDF
        os.system(f'{args.gs} -sDEVICE=pdfwrite -dNOPAUSE -dBATCH -dFirstPage={page_num+1} -dLastPage={page_num+1} -sOutputFile={os.path.join(dr, str(page_num))}.pdf {os.path.join(dr,"main.pdf")} > /dev/null 2>&1')

        # convert to PNG
        os.system(f'{args.gs} -sDEVICE=png16m -r300 -o {os.path.join(dr, str(page_num))}.png {os.path.join(dr, str(page_num))}.pdf')

        if 'summary' not in components[1]:

            doc = fitz.open(f'{os.path.join(dr, str(page_num))}.pdf')
            page = doc[0]

            match = re.search(r'block(\d+)', components[1])
            block_num = int(match.group(1))

            for pb in gptpagemap:
                if isinstance(pb, list):
                    if pb[1] == page_num and pb[2] == block_num:
                        coords = block_coords[pb[0]][block_num]
                        break

            # Load an image
            image = Image.open(f'{os.path.join(dr, str(page_num))}.png')

            # Calculate scale factors
            scale_x = image.width / page.rect.width
            scale_y = image.height / page.rect.height

            # Rescale coordinates
            x0, y0, x1, y1 = coords
            x0 *= scale_x
            y0 *= scale_y
            x1 *= scale_x
            y1 *= scale_y

            # find out if this rectangle is on the left, right or whole width
            if coords[2] > page.rect.width / 2:
                pointerleft = Image.open('imgs/pointertoleft.png')
                pointer = pointerleft.convert("RGBA")
                onrightside = True
            else:
                pointerright = Image.open('imgs/pointertoright.png')
                pointer = pointerright.convert("RGBA")
                onrightside = False

            # Create draw object
            draw = ImageDraw.Draw(image)

            # Define thickness
            thickness = 5

            # Draw several rectangles to simulate thickness
            for i in range(thickness):
                draw.rectangle([x0 - i - 5, y0 - i - 5, x1 + i + 5, y1 + i + 5], outline="green")

            # Calculate the center of the rectangle
            rect_center_y = (y0 + y1) / 2

            # Scale down the pointer image while preserving aspect ratio
            desired_height = image.height / 20
            aspect_ratio = pointer.width / pointer.height
            new_width = int(aspect_ratio * desired_height)
            pointer = pointer.resize((new_width, int(desired_height)))

            # Calculate position for the pointer
            if onrightside:
                pointer_x0 = x1 + 20
            else:
                pointer_x0 = x0 - 20 - new_width

            pointer_y0 = rect_center_y - (pointer.height / 2)

            # Paste the pointer on the main image
            image.paste(pointer, (int(pointer_x0), int(pointer_y0)), pointer)  # The last argument is for transparency

            # Save the combined image to a file
            image.save(f'{os.path.join(dr, str(page_num))}.png')

        # process each image-audio pair to create video chunk
        resolution = "scale=1920:-2"
        os.system(f'{args.ffmpeg} -loop 1 -i {os.path.join(dr, str(page_num))}.png -i {os.path.join(dr, audio)}.mp3 '
                  f'-vf {resolution} -c:v libx264 -tune stillimage -y -c:a aac -b:a 128k -pix_fmt yuv420p '
                  f'-shortest {os.path.join(dr, video)}.mp4')

        # ensure that there is no silence at the end of the video, and video len is the same as audio len
        os.system(f'audio_duration=$({args.ffprobe} -i {os.path.join(dr, audio)}.mp3 '
                  f'-show_entries format=duration -v quiet -of csv="p=0"); '
                  f'audio_duration=$((${{audio_duration%.*}} + 1)); '
                  f'{args.ffmpeg} -i {os.path.join(dr, video)}.mp4 -t $audio_duration '
                  f'-y -c copy {os.path.join(dr, video)}_final.mp4')

        # list of all chunks
        outvideo.write(f"file '{video}_final.mp4'\n")

    outvideo.close()

    # joint video
    os.system(f'{args.ffmpeg} -f concat -i {os.path.join(dr, "mp4_list.txt")} -y -c copy {os.path.join(dr, "output.mp4")}')

    # =============== SHORT VIDEO ====================

    if os.path.exists(os.path.join(dr, "shorts_mp3_list.txt")):

        with open(os.path.join(dr, "shorts_mp3_list.txt"), "r") as f:
            lines = f.readlines()

        # create list of chunks
        outvideo = open(os.path.join(dr, 'short_mp4_list.txt'), 'w')

        # Process each line
        for page_num, line in enumerate(lines):
            # Remove the newline character at the end of the line
            line = line.strip()

            # Split the line into components
            components = line.split()

            # The filename is the second component
            audio = components[1].replace('.mp3', '')
            video = audio.replace('-', '')

            # convert to PNG
            if page_num == 0:
                input_path = os.path.join(dr, str(page_num))
            else:
                input_path = os.path.join(dr, 'slides', f'slide_{page_num}')

            os.system(f'{args.gs} -sDEVICE=png16m -r500 -o {os.path.join(dr, str(page_num))}.png {input_path}.pdf')

            resolution = "scale=1920:-2"
            os.system(f'{args.ffmpeg} -loop 1 -i {os.path.join(dr, str(page_num))}.png -i {os.path.join(dr, audio)}.mp3 '
                      f'-vf {resolution} -c:v libx264 -tune stillimage -y -c:a aac -b:a 128k -pix_fmt yuv420p '
                      f'-shortest {os.path.join(dr, video)}.mp4')

            # ensure that there is no silence at the end of the video, and video len is the same as audio len
            os.system(f'audio_duration=$({args.ffprobe} -i {os.path.join(dr, audio)}.mp3 -show_entries format=duration '
                      f'-v quiet -of csv="p=0"); 'f'audio_duration=$((${{audio_duration%.*}} + 1)); 'f'{args.ffmpeg} '
                      f'-i {os.path.join(dr, video)}.mp4 -t $audio_duration -y -c copy {os.path.join(dr, video)}_final.mp4')

            # list of all chunks
            outvideo.write(f"file '{video}_final.mp4'\n")

        outvideo.close()

        # joint video
        os.system(f'{args.ffmpeg} -f concat -i {os.path.join(dr, "short_mp4_list.txt")} '
                  f'-y -c copy {os.path.join(dr, "output_short.mp4")}')

    # =============== QA VIDEO ====================

    if os.path.exists(os.path.join(dr, "qa_mp3_list.txt")):

        with open(os.path.join(dr, "qa_mp3_list.txt"), "r") as f:
            lines = f.readlines()

        # create list of chunks
        outvideo = open(os.path.join(dr, 'qa_mp4_list.txt'), 'w')

        qa_pages = pickle.load(open(os.path.join(dr, 'qa_pages.pkl'), 'rb'))

        # Process each line
        turn = -1
        for line_num, line in enumerate(lines):
            # Remove the newline character at the end of the line
            line = line.strip()

            # Split the line into components
            components = line.split()

            # The filename is the second component
            audio = components[1].replace('.mp3', '')
            video = audio.replace('-', '')

            # convert to PNG
            if 'question' in audio:  # question - get created slide
                turn += 1
                page_num = 0
                input_path = os.path.join(dr, 'questions', f'question_{turn}')
            else:  # answer - get single page from paper
                p_num = qa_pages[turn][page_num]
                # extract the page from PDF
                os.system(f'{args.gs} -sDEVICE=pdfwrite -dNOPAUSE -dBATCH -dFirstPage={p_num+1} -dLastPage={p_num+1} -sOutputFile={os.path.join(dr, str(p_num))}.pdf {os.path.join(dr, "main.pdf")} > /dev/null 2>&1')
                input_path = os.path.join(dr, f'{p_num}')
                page_num += 1

            qa_page = 'qa_page.png'
            os.system(f'{args.gs} -sDEVICE=png16m -r500 -o {os.path.join(dr, qa_page)} {input_path}.pdf')

            resolution = "scale=1920:-2"
            os.system(f'{args.ffmpeg} -loop 1 -i {os.path.join(dr, qa_page)} -i {os.path.join(dr, audio)}.mp3 '
                      f'-vf {resolution} -c:v libx264 -tune stillimage -y -c:a aac -b:a 128k -pix_fmt yuv420p '
                      f'-shortest {os.path.join(dr, video)}.mp4')

            # ensure that there is no silence at the end of the video, and video len is the same as audio len
            os.system(f'audio_duration=$({args.ffprobe} -i {os.path.join(dr, audio)}.mp3 -show_entries format=duration '
                      f'-v quiet -of csv="p=0"); 'f'audio_duration=$((${{audio_duration%.*}} + 1)); 'f'{args.ffmpeg} '
                      f'-i {os.path.join(dr, video)}.mp4 -t $audio_duration -y -c copy {os.path.join(dr, video)}_final.mp4')

            # list of all chunks
            outvideo.write(f"file '{video}_final.mp4'\n")

        outvideo.close()

        # joint video
        os.system(f'{args.ffmpeg} -f concat -i {os.path.join(dr, "qa_mp4_list.txt")} '
                  f'-y -c copy {os.path.join(dr, "output_qa.mp4")}')

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Arguments')
    parser.add_argument("--paperid", type=str, default='')
    parser.add_argument("--ffmpeg", type=str, default='ffmpeg')
    parser.add_argument("--ffprobe", type=str, default='ffprobe')
    parser.add_argument("--gs", type=str, default='gs')

    args = parser.parse_args()

    main(args)