#!/usr/bin/env python3
"""
webarchive_to_mochi.py

Convert Safari webarchive files from Athena/Ellucian class lists 
into Mochi flashcard decks for learning student names.

Card format:
  - Front: Student photo
  - Back: Student name + class identifier

Dependencies:
  - Python 3.6+ (uses only standard library)
  - plistlib (built-in)
  - zipfile (built-in)
  - json (built-in)
  - re (built-in)

Usage:
  python webarchive_to_mochi.py class1.webarchive:4250 class2.webarchive:6250 -o student_names.mochi

  Each argument is a webarchive file followed by a colon and a label (e.g., course number).
  The label appears on the back of each card below the student's name.

Author: Generated with Claude
"""

import argparse
import json
import plistlib
import random
import re
import string
import zipfile
from pathlib import Path


def extract_students(webarchive_path: Path) -> tuple[dict, dict]:
    """
    Extract student names and photos from a Safari webarchive file.
    
    Returns:
        student_map: dict mapping bannerId -> "Last, First M." name
        photos: dict mapping bannerId -> JPEG bytes
    """
    with open(webarchive_path, 'rb') as f:
        data = plistlib.load(f)
    
    html = data['WebMainResource']['WebResourceData'].decode('utf-8', errors='replace')
    
    # Pattern matches: <img src="...bannerId=XXXXX..." alt="Name, Name N." ...>
    pattern = r'<img[^>]+src="[^"]*bannerId=(\d+)[^"]*"[^>]+alt="([^"]+)"'
    matches = re.findall(pattern, html)
    student_map = {banner_id: name for banner_id, name in matches}
    
    # Extract JPEG images from subresources
    subs = data.get('WebSubresources', [])
    photos = {}
    for sub in subs:
        url = sub.get('WebResourceURL', '')
        mime = sub.get('WebResourceMIMEType', '')
        if 'image/jpeg' in mime and 'bannerId=' in url:
            match = re.search(r'bannerId=(\d+)', url)
            if match:
                banner_id = match.group(1)
                photos[banner_id] = sub['WebResourceData']
    
    return student_map, photos


def format_name(name: str) -> str:
    """Convert 'Last, First M.' to 'First M. Last'"""
    parts = name.split(', ')
    if len(parts) == 2:
        return f"{parts[1]} {parts[0]}"
    return name


def make_id(length: int = 8) -> str:
    """Generate a Mochi-style alphanumeric ID."""
    return ''.join(random.choices(string.ascii_letters + string.digits, k=length))


def create_mochi_deck(
    class_files: list[tuple[Path, str]], 
    output_path: Path,
    deck_name: str = "Student Names"
) -> int:
    """
    Create a .mochi flashcard deck from webarchive files.
    
    Args:
        class_files: List of (webarchive_path, class_label) tuples
        output_path: Path for the output .mochi file
        deck_name: Name for the deck in Mochi
        
    Returns:
        Number of cards created
    """
    deck_id = make_id()
    cards = []
    media_files = {}  # filename -> bytes
    
    pos_chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
    pos_idx = 0
    
    for webarchive_path, class_label in class_files:
        print(f"Processing {webarchive_path}...")
        student_map, photos = extract_students(webarchive_path)
        
        found = 0
        missing = 0
        for banner_id, name in student_map.items():
            if banner_id in photos:
                img_filename = f"{banner_id}.jpg"
                media_files[img_filename] = photos[banner_id]
                
                formatted_name = format_name(name)
                content = f"![](@media/{img_filename})\n---\n{formatted_name}\n{class_label}"
                
                card_id = make_id()
                pos = pos_chars[pos_idx % len(pos_chars)]
                pos_idx += 1
                
                cards.append({
                    "~:content": content,
                    "~:deck-id": f"~:{deck_id}",
                    "~:id": f"~:{card_id}",
                    "~:pos": pos,
                    "~:reviews": [],
                    "~:tags": {"~#set": []},
                    "~:references": {"~#set": []}
                })
                found += 1
            else:
                missing += 1
                print(f"  Warning: No photo for {name}")
        
        print(f"  Found {found} students with photos" + (f" ({missing} without)" if missing else ""))
    
    # Build Transit JSON structure
    data = {
        "~:version": 2,
        "~:decks": [{
            "~:name": deck_name,
            "~:id": f"~:{deck_id}",
            "~:cards": {"~#list": cards}
        }],
        "~:templates": {"~#list": []}
    }
    
    # Create .mochi zip file
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
        zf.writestr('data.json', json.dumps(data, ensure_ascii=False))
        for filename, img_bytes in media_files.items():
            zf.writestr(filename, img_bytes)
    
    print(f"\nCreated {output_path} with {len(cards)} cards")
    return len(cards)


def main():
    parser = argparse.ArgumentParser(
        description="Convert Athena/Ellucian webarchive class lists to Mochi flashcard decks.",
        epilog="Example: %(prog)s ClassList4250.webarchive:4250 ClassList6250.webarchive:6250 -o students.mochi"
    )
    parser.add_argument(
        'inputs', 
        nargs='+', 
        metavar='FILE:LABEL',
        help="Webarchive file and class label (e.g., ClassList.webarchive:MATH4250)"
    )
    parser.add_argument(
        '-o', '--output',
        type=Path,
        default=Path('student_names.mochi'),
        help="Output .mochi file (default: student_names.mochi)"
    )
    parser.add_argument(
        '-n', '--name',
        default="Student Names",
        help="Deck name in Mochi (default: 'Student Names')"
    )
    
    args = parser.parse_args()
    
    # Parse input files
    class_files = []
    for inp in args.inputs:
        if ':' in inp:
            path_str, label = inp.rsplit(':', 1)
        else:
            path_str = inp
            label = Path(inp).stem  # Use filename as label
        
        path = Path(path_str)
        if not path.exists():
            parser.error(f"File not found: {path}")
        
        class_files.append((path, label))
    
    create_mochi_deck(class_files, args.output, args.name)


if __name__ == '__main__':
    main()
