In [1]:
import os
import ffmpeg
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont

# --------------------------
# Adjustable Global Settings
# --------------------------
INPUT_VIDEO_PATH       = "test.mp4"                # Path to your input video
OUTPUT_AUDIO_PATH      = "extracted_audio.mp3"     # Where to save extracted audio
ASCII_FRAMES_DIR       = "ascii_framesbest"            # Where ASCII PNG frames are stored
OUTPUT_VIDEO_SILENT    = "ascii_silentBest.mp4"        # Silent ASCII video output
FINAL_OUTPUT_VIDEO     = "ascii_final_with_audioBest.mp4"

# If you have a preferred monospaced font installed, put its path here.
# Otherwise, we'll try default. Example: "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf"
FONT_PATH              = None  
FONT_SIZE             = 12

# ASCII chars from light to dark
ASCII_CHARS           = " .:-=+*#%@"

# Desired character width (in ASCII columns); keep small to reduce memory usage
ASCII_WIDTH           = 240  
# Maximum ASCII height (to avoid huge images if aspect ratio is extreme)
ASCII_MAX_HEIGHT      = 180

# Target frame rate for final ASCII video; reduce to lower memory usage
TARGET_FPS            = 23.5


# ----------------------------------------------------------------------------
# 1. Extract Audio from the Original Video
# ----------------------------------------------------------------------------
def extract_audio_ffmpeg(input_video_path, output_audio_path):
    """
    Extracts audio from the input video and saves as MP3.
    """
    try:
        (
            ffmpeg
            .input(input_video_path)
            .output(output_audio_path, format="mp3", acodec="mp3")
            .run(overwrite_output=True, quiet=True)
        )
        print(f"[INFO] Audio extracted to: {output_audio_path}")
    except ffmpeg.Error as e:
        print(f"[ERROR] FFmpeg audio extraction failed: {e}")


# ----------------------------------------------------------------------------
# 2. Convert a Single Grayscale Frame to a PIL ASCII Image
# ----------------------------------------------------------------------------
def frame_to_ascii_image(gray_frame, font_path=None, font_size=12, 
                         target_width=80, max_height=60):
    """
    Convert a single grayscale frame (NumPy array) to a PIL Image of ASCII text.
    Resizes to the specified width, with an optional max height clamp.
    """
    rows, cols = gray_frame.shape

    # Estimate new height from aspect ratio
    # Divide by ~2 to account for character cell aspect ratio
    new_height = int(rows * target_width / (cols * 2))
    new_height = min(new_height, max_height)

    # Resize frame to smaller dimension to reduce memory usage
    resized = cv2.resize(gray_frame, (target_width, new_height), interpolation=cv2.INTER_AREA)

    # Map each pixel to an ASCII character
    num_ascii_chars = len(ASCII_CHARS)
    step = max(1, 256 // num_ascii_chars)
    # Vectorized mapping: each pixel p -> ASCII_CHARS[index]
    ascii_array = np.vectorize(lambda p: ASCII_CHARS[min(p // step, num_ascii_chars - 1)])(resized)

    # Try to load the specified font; fall back to default if not found
    if font_path is not None:
        try:
            font = ImageFont.truetype(font_path, font_size)
        except IOError:
            print("[WARN] Could not load custom font. Using default.")
            font = ImageFont.load_default()
    else:
        font = ImageFont.load_default()

    char_width, char_height = font.getsize("A")  # measure a typical char size
    image_width = target_width * char_width
    image_height = new_height * char_height

    # Create a new PIL image
    image = Image.new("RGB", (image_width, image_height), "black")
    draw = ImageDraw.Draw(image)

    # Draw each row of ASCII characters
    for row_idx, row_val in enumerate(ascii_array):
        row_str = "".join(row_val)
        y_pos = row_idx * char_height
        draw.text((0, y_pos), row_str, font=font, fill="white")

    return image


# ----------------------------------------------------------------------------
# 3. Convert Video Frames to ASCII Images
# ----------------------------------------------------------------------------
def video_to_ascii_frames(video_path,
                          output_dir,
                          font_path=None,
                          font_size=12,
                          target_width=80,
                          max_height=60,
                          fps=10):
    """
    Reads frames from a video, converts each to ASCII, and saves PNG files in output_dir.
    We downsample to 'fps' to reduce the total number of frames (and memory usage).
    """
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise IOError(f"Cannot open video file: {video_path}")

    # If needed, you can do a manual frame-skip approach.
    # For instance, to skip 2 out of every 3 frames if the original is 30 fps to get ~10 fps.
    original_fps = cap.get(cv2.CAP_PROP_FPS)
    frame_skip = int(max(1, round(original_fps / fps)))
    print(f"[INFO] Original FPS: {original_fps:.2f}, Target FPS: {fps}, frame_skip: {frame_skip}")

    frame_count = 0
    saved_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Skip frames to achieve target fps
        if frame_count % frame_skip != 0:
            frame_count += 1
            continue

        # Convert to grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Convert grayscale to ASCII image
        ascii_img = frame_to_ascii_image(
            gray,
            font_path=font_path,
            font_size=font_size,
            target_width=target_width,
            max_height=max_height
        )

        # Save the ASCII image as PNG
        output_path = os.path.join(output_dir, f"frame_{saved_count:05d}.png")
        ascii_img.save(output_path)
        saved_count += 1
        frame_count += 1

        if saved_count % 50 == 0:
            print(f"[INFO] Processed {saved_count} ASCII frames...")

    cap.release()
    print(f"[INFO] Total frames saved: {saved_count}")


# ----------------------------------------------------------------------------
# 4. Create a Silent Video from ASCII Frames
# ----------------------------------------------------------------------------
def create_silent_video_from_frames(frames_dir, output_video_path, fps=10):
    """
    Uses OpenCV to read sorted PNG frames from 'frames_dir' and writes out a silent MP4 video.
    """
    frame_files = sorted([f for f in os.listdir(frames_dir) if f.endswith('.png')])
    if not frame_files:
        raise ValueError(f"No PNG frames found in {frames_dir}")

    # Read the first frame to get dimensions
    first_frame_path = os.path.join(frames_dir, frame_files[0])
    first_frame = cv2.imread(first_frame_path)
    if first_frame is None:
        raise IOError(f"Could not read the first frame: {first_frame_path}")
    height, width, _ = first_frame.shape

    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    for frame_file in frame_files:
        frame_path = os.path.join(frames_dir, frame_file)
        frame = cv2.imread(frame_path)
        if frame is None:
            print(f"[WARN] Skipping unreadable frame: {frame_file}")
            continue
        video_writer.write(frame)

    video_writer.release()
    print(f"[INFO] Silent ASCII video saved to: {output_video_path}")


# ----------------------------------------------------------------------------
# 5. Merge Silent Video and Audio
# ----------------------------------------------------------------------------
def merge_video_audio(video_path, audio_path, output_final_video):
    """
    Merges a silent video file and an audio file into a single MP4 (H.264 + AAC).
    """
    try:
        (
            ffmpeg
            .output(ffmpeg.input(video_path), ffmpeg.input(audio_path),
                    output_final_video,
                    vcodec="libx264",    # H.264
                    acodec="aac",        # AAC
                    strict="experimental"
            )
            .run(overwrite_output=True, quiet=True)
        )
        print(f"[INFO] Final video with audio saved to: {output_final_video}")
    except ffmpeg.Error as e:
        print(f"[ERROR] FFmpeg merge failed: {e}")


# ----------------------------------------------------------------------------
# MAIN EXECUTION
# ----------------------------------------------------------------------------
def main():
    # 1) Extract audio from the original video
    extract_audio_ffmpeg(INPUT_VIDEO_PATH, OUTPUT_AUDIO_PATH)

    # 2) Convert video -> ASCII frames (PNG)
    video_to_ascii_frames(
        video_path=INPUT_VIDEO_PATH,
        output_dir=ASCII_FRAMES_DIR,
        font_path=FONT_PATH,
        font_size=FONT_SIZE,
        target_width=ASCII_WIDTH,
        max_height=ASCII_MAX_HEIGHT,
        fps=TARGET_FPS
    )

    # 3) Create a silent video from the ASCII frames
    create_silent_video_from_frames(
        frames_dir=ASCII_FRAMES_DIR,
        output_video_path=OUTPUT_VIDEO_SILENT,
        fps=TARGET_FPS
    )

    # 4) Merge the silent video and extracted audio
    merge_video_audio(OUTPUT_VIDEO_SILENT, OUTPUT_AUDIO_PATH, FINAL_OUTPUT_VIDEO)

    print("[DONE] Finished! Your final ASCII video is at:", FINAL_OUTPUT_VIDEO)


if __name__ == "__main__":
    main()


[INFO] Audio extracted to: extracted_audio.mp3
[INFO] Original FPS: 23.98, Target FPS: 23.5, frame_skip: 1
[INFO] Processed 50 ASCII frames...
[INFO] Processed 100 ASCII frames...
[INFO] Processed 150 ASCII frames...
[INFO] Processed 200 ASCII frames...
[INFO] Processed 250 ASCII frames...
[INFO] Processed 300 ASCII frames...
[INFO] Processed 350 ASCII frames...
[INFO] Processed 400 ASCII frames...
[INFO] Processed 450 ASCII frames...
[INFO] Processed 500 ASCII frames...
[INFO] Processed 550 ASCII frames...
[INFO] Processed 600 ASCII frames...
[INFO] Processed 650 ASCII frames...
[INFO] Processed 700 ASCII frames...
[INFO] Processed 750 ASCII frames...
[INFO] Processed 800 ASCII frames...
[INFO] Processed 850 ASCII frames...
[INFO] Processed 900 ASCII frames...
[INFO] Processed 950 ASCII frames...
[INFO] Processed 1000 ASCII frames...
[INFO] Processed 1050 ASCII frames...
[INFO] Processed 1100 ASCII frames...
[INFO] Processed 1150 ASCII frames...
[INFO] Processed 1200 ASCII frames...
[