Source code for yt_framework.utils.archive

#!/usr/bin/env python3
"""
YT Vanilla Job Script to untar archive and upload files to YT build folder.

This script is executed as a standalone YT vanilla job (not imported as a module).

This script:
1. Downloads the archive from YT (or uses local file if provided)
2. Extracts it to local filesystem
3. Uploads extracted files to YT build folder

Environment variables required:
- YT_BUILD_FOLDER: YT path to build folder
- YT_ARCHIVE_PATH: YT path to archive file
- ARCHIVE_LOCAL_NAME: Local filename of archive in sandbox (default: code.tar.gz)
"""

import tarfile
import tempfile
import os
import sys
from pathlib import Path
import yt.wrapper as yt


[docs] def main() -> None: """ Main entry point for archive extraction and upload script. This function is executed as a standalone YT vanilla job. It: 1. Downloads the archive from YT (or uses local file if provided) 2. Extracts it to local filesystem 3. Uploads extracted files to YT build folder Environment variables required: YT_BUILD_FOLDER: YT path to build folder YT_ARCHIVE_PATH: YT path to archive file ARCHIVE_LOCAL_NAME: Local filename of archive in sandbox (default: code.tar.gz) Returns: None (exits with code 0 on success, 1 on failure) Raises: SystemExit: If required environment variables are missing or operations fail. """ yt_build_folder = os.environ.get("YT_BUILD_FOLDER") yt_archive_path = os.environ.get("YT_ARCHIVE_PATH") archive_local_name = os.environ.get("ARCHIVE_LOCAL_NAME", "code.tar.gz") if not yt_build_folder or not yt_archive_path: error_msg = f"Missing environment variables: YT_BUILD_FOLDER={yt_build_folder}, YT_ARCHIVE_PATH={yt_archive_path}" print(error_msg, file=sys.stderr) sys.exit(1) # Always extract code to build folder (where map operations expect it) # code_extraction_folder is only used for storing the untar script yt_upload_folder = yt_build_folder with tempfile.TemporaryDirectory() as tmpdir: # Archive is already in sandbox as code.tar.gz (provided via file_paths) archive_local_path = Path(archive_local_name) if not archive_local_path.exists(): error_msg = f"Archive file not found in sandbox: {archive_local_path}" print(error_msg, file=sys.stderr) sys.exit(1) extract_dir = Path(tmpdir) / "extracted_code" extract_dir.mkdir() print( f"Using archive from sandbox: {archive_local_path} ({archive_local_path.stat().st_size / (1024*1024):.2f} MB)", file=sys.stderr, ) print(f"Extracting archive to {extract_dir}", file=sys.stderr) try: with tarfile.open(archive_local_path, "r:gz") as tar: tar.extractall(extract_dir) print("Extracted archive", file=sys.stderr) except Exception as e: error_msg = f"Failed to extract archive: {e}" print(error_msg, file=sys.stderr) sys.exit(1) print( f"Uploading extracted files to YT folder: {yt_upload_folder}", file=sys.stderr, ) uploaded_count = 0 try: # Ensure the extraction folder exists yt.create( "map_node", yt_upload_folder, recursive=True, ignore_existing=True ) for root, dirs, files in os.walk(extract_dir): for file in files: local_file = Path(root) / file rel_path = local_file.relative_to(extract_dir) yt_path = f"{yt_upload_folder}/{rel_path}".replace("\\", "/") parent = "/".join(yt_path.split("/")[:-1]) if parent: yt.create( "map_node", parent, recursive=True, ignore_existing=True ) with open(local_file, "rb") as f: yt.write_file(yt_path, f, force_create=True, compute_md5=True) uploaded_count += 1 print( f"Successfully uploaded {uploaded_count} files to {yt_upload_folder} (build folder)", file=sys.stderr, ) sys.exit(0) except Exception as e: error_msg = f"Failed to upload files: {e}" print(error_msg, file=sys.stderr) sys.exit(1)
if __name__ == "__main__": main()