feat(main): commit
This commit is contained in:
52
core/management/commands/cache_upcoming.py
Normal file
52
core/management/commands/cache_upcoming.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""
|
||||
management command: cache_upcoming
|
||||
|
||||
Delegates to core.services.cache.run_cache() — the same logic exposed
|
||||
by the API endpoint, so CLI and web UI behavior are always in sync.
|
||||
|
||||
Usage:
|
||||
python manage.py cache_upcoming # default: next 24 hours
|
||||
python manage.py cache_upcoming --hours 48
|
||||
python manage.py cache_upcoming --prune-only
|
||||
"""
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from core.services.cache import run_cache
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Download YouTube videos for upcoming airings and prune old cache files."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--hours",
|
||||
type=int,
|
||||
default=24,
|
||||
help="How many hours ahead to scan for upcoming airings (default: 24).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prune-only",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Only delete expired cache files; do not download anything new.",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
hours = options["hours"]
|
||||
prune_only = options["prune_only"]
|
||||
|
||||
self.stdout.write(f"▶ Running cache worker (window: {hours}h, prune-only: {prune_only})")
|
||||
result = run_cache(hours=hours, prune_only=prune_only)
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(f" 🗑 Pruned: {result['pruned']}"))
|
||||
self.stdout.write(self.style.SUCCESS(f" ↓ Downloaded: {result['downloaded']}"))
|
||||
self.stdout.write(self.style.SUCCESS(f" ✓ Already cached: {result['already_cached']}"))
|
||||
if result["failed"]:
|
||||
self.stderr.write(self.style.ERROR(f" ✗ Failed: {result['failed']}"))
|
||||
|
||||
for item in result["items"]:
|
||||
icon = {"downloaded": "↓", "cached": "✓", "failed": "✗"}.get(item["status"], "?")
|
||||
line = f" {icon} [{item['status']:10}] {item['title'][:70]}"
|
||||
if item.get("error"):
|
||||
line += f" — {item['error']}"
|
||||
self.stdout.write(line)
|
||||
@@ -1,16 +1,29 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
from core.models import AppUser, Library, Channel, MediaItem, Airing, ScheduleTemplate
|
||||
from core.services.scheduler import ScheduleGenerator
|
||||
from django.utils import timezone
|
||||
from datetime import timedelta
|
||||
from datetime import timedelta, date
|
||||
import textwrap
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Displays a beautifully formatted terminal dashboard of the current backend state."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--channel', type=int, help='Inspect specific channel schedule')
|
||||
parser.add_argument('--test-generate', action='store_true', help='Trigger generation for today if inspecting a channel')
|
||||
|
||||
def get_color(self, text, code):
|
||||
"""Helper to wrap string in bash color codes"""
|
||||
return f"\033[{code}m{text}\033[0m"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
channel_id = options.get('channel')
|
||||
test_generate = options.get('test_generate')
|
||||
|
||||
if channel_id:
|
||||
self.inspect_channel(channel_id, test_generate)
|
||||
return
|
||||
|
||||
# 1. Gather Aggregate Metrics
|
||||
total_users = AppUser.objects.count()
|
||||
total_libraries = Library.objects.count()
|
||||
@@ -46,7 +59,7 @@ class Command(BaseCommand):
|
||||
for c in channels:
|
||||
status_color = "1;32" if c.is_active else "1;31"
|
||||
status_text = "ACTIVE" if c.is_active else "INACTIVE"
|
||||
self.stdout.write(f"\n 📺 [{c.channel_number or '-'}] {c.name} ({self.get_color(status_text, status_color)})")
|
||||
self.stdout.write(f"\n 📺 [{c.id}] {c.name} (Ch {c.channel_number or '-'}) ({self.get_color(status_text, status_color)})")
|
||||
|
||||
# Show templates
|
||||
templates = c.scheduletemplate_set.filter(is_active=True).order_by('-priority')
|
||||
@@ -57,4 +70,40 @@ class Command(BaseCommand):
|
||||
blocks_count = t.scheduleblock_set.count()
|
||||
self.stdout.write(f" 📄 Template: {t.name} (Priority {t.priority}) -> {blocks_count} Blocks")
|
||||
|
||||
self.stdout.write(f"\nUse {self.get_color('--channel <id>', '1;37')} to inspect detailed schedule.\n")
|
||||
|
||||
def inspect_channel(self, channel_id, test_generate):
|
||||
try:
|
||||
channel = Channel.objects.get(id=channel_id)
|
||||
except Channel.DoesNotExist:
|
||||
self.stdout.write(self.get_color(f"Error: Channel {channel_id} not found.", "1;31"))
|
||||
return
|
||||
|
||||
if test_generate:
|
||||
self.stdout.write(self.get_color(f"\nTriggering schedule generation for {channel.name}...", "1;33"))
|
||||
generator = ScheduleGenerator(channel)
|
||||
count = generator.generate_for_date(date.today())
|
||||
self.stdout.write(f"Done. Created {self.get_color(str(count), '1;32')} new airings.")
|
||||
|
||||
now = timezone.now()
|
||||
end_window = now + timedelta(hours=12)
|
||||
|
||||
airings = Airing.objects.filter(
|
||||
channel=channel,
|
||||
ends_at__gt=now,
|
||||
starts_at__lt=end_window
|
||||
).select_related('media_item').order_by('starts_at')
|
||||
|
||||
self.stdout.write(self.get_color(f"\n=== Schedule for {channel.name} (Next 12h) ===", "1;34"))
|
||||
|
||||
if not airings:
|
||||
self.stdout.write(self.get_color(" (No airings scheduled in this window)", "1;33"))
|
||||
else:
|
||||
for a in airings:
|
||||
time_str = f"{a.starts_at.strftime('%H:%M')} - {a.ends_at.strftime('%H:%M')}"
|
||||
if a.starts_at <= now <= a.ends_at:
|
||||
self.stdout.write(f" {self.get_color('▶ ON AIR', '1;32')} {self.get_color(time_str, '1;37')} | {a.media_item.title}")
|
||||
else:
|
||||
self.stdout.write(f" {time_str} | {a.media_item.title}")
|
||||
|
||||
self.stdout.write("\n")
|
||||
|
||||
45
core/migrations/0002_mediaitem_cache_expires_at_and_more.py
Normal file
45
core/migrations/0002_mediaitem_cache_expires_at_and_more.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# Generated by Django 6.0.3 on 2026-03-08 15:34
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("core", "0001_initial"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="mediaitem",
|
||||
name="cache_expires_at",
|
||||
field=models.DateTimeField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="mediaitem",
|
||||
name="cached_file_path",
|
||||
field=models.TextField(blank=True, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="mediaitem",
|
||||
name="youtube_video_id",
|
||||
field=models.CharField(blank=True, db_index=True, max_length=64, null=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name="mediasource",
|
||||
name="source_type",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("local_directory", "Local Directory"),
|
||||
("network_share", "Network Share"),
|
||||
("manual_import", "Manual Import"),
|
||||
("playlist", "Playlist"),
|
||||
("stream", "Stream"),
|
||||
("api_feed", "API Feed"),
|
||||
("youtube_channel", "YouTube Channel"),
|
||||
("youtube_playlist", "YouTube Playlist"),
|
||||
],
|
||||
max_length=32,
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -85,6 +85,8 @@ class MediaSource(models.Model):
|
||||
PLAYLIST = 'playlist', 'Playlist'
|
||||
STREAM = 'stream', 'Stream'
|
||||
API_FEED = 'api_feed', 'API Feed'
|
||||
YOUTUBE_CHANNEL = 'youtube_channel', 'YouTube Channel'
|
||||
YOUTUBE_PLAYLIST = 'youtube_playlist', 'YouTube Playlist'
|
||||
|
||||
source_type = models.CharField(max_length=32, choices=SourceType.choices)
|
||||
uri = models.TextField()
|
||||
@@ -152,6 +154,11 @@ class MediaItem(models.Model):
|
||||
is_active = models.BooleanField(default=True)
|
||||
date_added_at = models.DateTimeField(auto_now_add=True)
|
||||
metadata_json = models.JSONField(default=dict)
|
||||
# YouTube-specific: the video ID from yt-dlp
|
||||
youtube_video_id = models.CharField(max_length=64, blank=True, null=True, db_index=True)
|
||||
# Local cache path for downloaded YouTube videos (distinct from file_path which holds source URI)
|
||||
cached_file_path = models.TextField(blank=True, null=True)
|
||||
cache_expires_at = models.DateTimeField(blank=True, null=True)
|
||||
|
||||
genres = models.ManyToManyField(Genre, related_name="media_items", blank=True)
|
||||
|
||||
|
||||
140
core/services/cache.py
Normal file
140
core/services/cache.py
Normal file
@@ -0,0 +1,140 @@
|
||||
"""
|
||||
Cache service — reusable download/prune logic used by both:
|
||||
- python manage.py cache_upcoming
|
||||
- POST /api/sources/cache-upcoming
|
||||
"""
|
||||
|
||||
import logging
|
||||
import pathlib
|
||||
from datetime import timedelta
|
||||
|
||||
from django.utils import timezone
|
||||
|
||||
from core.models import Airing, MediaItem, MediaSource
|
||||
from core.services.youtube import download_for_airing, YOUTUBE_SOURCE_TYPES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_cache(hours: int = 24, prune_only: bool = False) -> dict:
|
||||
"""
|
||||
Scan Airings in the next `hours` hours, download any uncached YouTube
|
||||
videos, and prune stale local files.
|
||||
|
||||
Returns a summary dict suitable for JSON serialization.
|
||||
"""
|
||||
now = timezone.now()
|
||||
window_end = now + timedelta(hours=hours)
|
||||
|
||||
# ── Prune first ────────────────────────────────────────────────────────
|
||||
pruned = _prune(now)
|
||||
|
||||
if prune_only:
|
||||
return {"pruned": pruned, "downloaded": 0, "already_cached": 0, "failed": 0, "items": []}
|
||||
|
||||
# ── Find upcoming and currently playing YouTube-backed airings ──────────
|
||||
upcoming = (
|
||||
Airing.objects
|
||||
.filter(ends_at__gt=now, starts_at__lte=window_end)
|
||||
.select_related("media_item__media_source")
|
||||
)
|
||||
|
||||
youtube_items: dict[int, MediaItem] = {}
|
||||
for airing in upcoming:
|
||||
item = airing.media_item
|
||||
if item.media_source and item.media_source.source_type in YOUTUBE_SOURCE_TYPES:
|
||||
youtube_items[item.pk] = item
|
||||
|
||||
downloaded = already_cached = failed = 0
|
||||
items_status = []
|
||||
|
||||
for item in youtube_items.values():
|
||||
# Skip if already cached
|
||||
if item.cached_file_path and pathlib.Path(item.cached_file_path).exists():
|
||||
already_cached += 1
|
||||
items_status.append({
|
||||
"id": item.pk,
|
||||
"title": item.title,
|
||||
"status": "cached",
|
||||
"path": item.cached_file_path,
|
||||
})
|
||||
continue
|
||||
|
||||
try:
|
||||
local_path = download_for_airing(item)
|
||||
downloaded += 1
|
||||
items_status.append({
|
||||
"id": item.pk,
|
||||
"title": item.title,
|
||||
"status": "downloaded",
|
||||
"path": str(local_path),
|
||||
})
|
||||
except Exception as exc:
|
||||
failed += 1
|
||||
items_status.append({
|
||||
"id": item.pk,
|
||||
"title": item.title,
|
||||
"status": "failed",
|
||||
"error": str(exc),
|
||||
})
|
||||
logger.error("download_for_airing(%s) failed: %s", item.pk, exc)
|
||||
|
||||
logger.info(
|
||||
"run_cache(hours=%d): pruned=%d downloaded=%d cached=%d failed=%d",
|
||||
hours, pruned, downloaded, already_cached, failed,
|
||||
)
|
||||
return {
|
||||
"pruned": pruned,
|
||||
"downloaded": downloaded,
|
||||
"already_cached": already_cached,
|
||||
"failed": failed,
|
||||
"items": items_status,
|
||||
}
|
||||
|
||||
|
||||
def _prune(now) -> int:
|
||||
"""Delete local cache files whose airings have all ended."""
|
||||
pruned = 0
|
||||
stale = MediaItem.objects.filter(cached_file_path__isnull=False).exclude(
|
||||
airing__ends_at__gte=now
|
||||
)
|
||||
for item in stale:
|
||||
p = pathlib.Path(item.cached_file_path)
|
||||
if p.exists():
|
||||
try:
|
||||
p.unlink()
|
||||
pruned += 1
|
||||
except OSError as exc:
|
||||
logger.warning("Could not delete %s: %s", p, exc)
|
||||
item.cached_file_path = None
|
||||
item.cache_expires_at = None
|
||||
item.save(update_fields=["cached_file_path", "cache_expires_at"])
|
||||
return pruned
|
||||
|
||||
|
||||
def get_download_status() -> dict:
|
||||
"""
|
||||
Return a snapshot of all YouTube MediaItems and their cache status,
|
||||
useful for rendering the Downloads UI.
|
||||
"""
|
||||
items = (
|
||||
MediaItem.objects
|
||||
.filter(media_source__source_type__in=YOUTUBE_SOURCE_TYPES)
|
||||
.select_related("media_source")
|
||||
.order_by("media_source__name", "title")
|
||||
)
|
||||
|
||||
result = []
|
||||
for item in items:
|
||||
cached = bool(item.cached_file_path and pathlib.Path(item.cached_file_path).exists())
|
||||
result.append({
|
||||
"id": item.pk,
|
||||
"title": item.title,
|
||||
"source_name": item.media_source.name,
|
||||
"source_id": item.media_source.id,
|
||||
"youtube_video_id": item.youtube_video_id,
|
||||
"runtime_seconds": item.runtime_seconds,
|
||||
"cached": cached,
|
||||
"cached_path": item.cached_file_path if cached else None,
|
||||
})
|
||||
return {"items": result, "total": len(result), "cached": sum(1 for r in result if r["cached"])}
|
||||
@@ -1,108 +1,210 @@
|
||||
from datetime import datetime, timedelta, date, time, timezone
|
||||
from core.models import Channel, ScheduleTemplate, ScheduleBlock, Airing, MediaItem
|
||||
"""
|
||||
Schedule generator — respects ChannelSourceRule assignments.
|
||||
|
||||
Source selection priority:
|
||||
1. If any rules with rule_mode='prefer' exist, items from those sources
|
||||
are weighted much more heavily.
|
||||
2. Items from rule_mode='allow' sources fill the rest.
|
||||
3. Items from rule_mode='avoid' sources are only used as a last resort
|
||||
(weight × 0.1).
|
||||
4. Items from rule_mode='block' sources are NEVER scheduled.
|
||||
5. If NO ChannelSourceRule rows exist for this channel, falls back to
|
||||
the old behaviour (all items in the channel's library).
|
||||
"""
|
||||
|
||||
import random
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, date, timezone
|
||||
|
||||
from core.models import (
|
||||
Channel, ChannelSourceRule, ScheduleTemplate,
|
||||
ScheduleBlock, Airing, MediaItem,
|
||||
)
|
||||
|
||||
|
||||
class ScheduleGenerator:
|
||||
"""
|
||||
A service that reads the latest ScheduleTemplate and Blocks for a given channel
|
||||
and generates concrete Airings logic based on available matching MediaItems.
|
||||
Reads ScheduleTemplate + ScheduleBlocks for a channel and fills the day
|
||||
with concrete Airing rows, picking MediaItems according to the channel's
|
||||
ChannelSourceRule assignments.
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, channel: Channel):
|
||||
self.channel = channel
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def generate_for_date(self, target_date: date) -> int:
|
||||
"""
|
||||
Idempotent generation of airings for a specific date on this channel.
|
||||
Returns the number of new Airings created.
|
||||
Idempotent generation of airings for `target_date`.
|
||||
Returns the number of new Airing rows created.
|
||||
"""
|
||||
# 1. Get the highest priority active template valid on this date
|
||||
template = ScheduleTemplate.objects.filter(
|
||||
channel=self.channel,
|
||||
is_active=True
|
||||
).filter(
|
||||
# Start date is null or <= target_date
|
||||
valid_from_date__isnull=True
|
||||
).order_by('-priority').first()
|
||||
|
||||
# In a real app we'd construct complex Q objects for the valid dates,
|
||||
# but for PYTV mock we will just grab the highest priority active template.
|
||||
template = self._get_template()
|
||||
if not template:
|
||||
template = ScheduleTemplate.objects.filter(channel=self.channel, is_active=True).order_by('-priority').first()
|
||||
if not template:
|
||||
return 0
|
||||
|
||||
# 2. Extract day of week mask
|
||||
# Python weekday: 0=Monday, 6=Sunday
|
||||
# Our mask: bit 0 = Monday, bit 6 = Sunday
|
||||
return 0
|
||||
|
||||
target_weekday_bit = 1 << target_date.weekday()
|
||||
|
||||
blocks = template.scheduleblock_set.all()
|
||||
airings_created = 0
|
||||
|
||||
|
||||
for block in blocks:
|
||||
# Check if block runs on this day
|
||||
if not (block.day_of_week_mask & target_weekday_bit):
|
||||
continue
|
||||
|
||||
# Naive time combining mapping local time to UTC timeline without specific tz logic for simplicity now
|
||||
|
||||
start_dt = datetime.combine(target_date, block.start_local_time, tzinfo=timezone.utc)
|
||||
end_dt = datetime.combine(target_date, block.end_local_time, tzinfo=timezone.utc)
|
||||
|
||||
# If the block wraps past midnight (e.g. 23:00 to 02:00)
|
||||
end_dt = datetime.combine(target_date, block.end_local_time, tzinfo=timezone.utc)
|
||||
|
||||
# Midnight-wrap support (e.g. 23:00–02:00)
|
||||
if end_dt <= start_dt:
|
||||
end_dt += timedelta(days=1)
|
||||
|
||||
# Clear existing airings in this window to allow idempotency
|
||||
|
||||
# Clear existing airings in this window (idempotency)
|
||||
Airing.objects.filter(
|
||||
channel=self.channel,
|
||||
starts_at__gte=start_dt,
|
||||
starts_at__lt=end_dt
|
||||
starts_at__lt=end_dt,
|
||||
).delete()
|
||||
|
||||
# 3. Pull matching Media Items
|
||||
# Simplistic matching: pull items from library matching the block's genre
|
||||
items_query = MediaItem.objects.filter(media_source__library=self.channel.library)
|
||||
if block.default_genre:
|
||||
items_query = items_query.filter(genres=block.default_genre)
|
||||
|
||||
available_items = list(items_query.exclude(item_kind="bumper"))
|
||||
|
||||
available_items = self._get_weighted_items(block)
|
||||
if not available_items:
|
||||
continue
|
||||
|
||||
# Shuffle randomly for basic scheduling variety
|
||||
random.shuffle(available_items)
|
||||
|
||||
# 4. Fill the block
|
||||
current_cursor = start_dt
|
||||
item_index = 0
|
||||
|
||||
while current_cursor < end_dt and item_index < len(available_items):
|
||||
item = available_items[item_index]
|
||||
duration = timedelta(seconds=item.runtime_seconds or 3600)
|
||||
|
||||
# Check if this item fits
|
||||
if current_cursor + duration > end_dt:
|
||||
# Item doesn't strictly fit, but we'll squeeze it in and break if needed
|
||||
# Real systems pad this out or trim the slot.
|
||||
pass
|
||||
|
||||
import uuid
|
||||
Airing.objects.create(
|
||||
channel=self.channel,
|
||||
schedule_template=template,
|
||||
schedule_block=block,
|
||||
media_item=item,
|
||||
starts_at=current_cursor,
|
||||
ends_at=current_cursor + duration,
|
||||
slot_kind="program",
|
||||
status="scheduled",
|
||||
source_reason="template",
|
||||
generation_batch_uuid=uuid.uuid4()
|
||||
)
|
||||
|
||||
current_cursor += duration
|
||||
item_index += 1
|
||||
airings_created += 1
|
||||
airings_created += self._fill_block(
|
||||
template, block, start_dt, end_dt, available_items
|
||||
)
|
||||
|
||||
return airings_created
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _get_template(self):
|
||||
"""Pick the highest-priority active ScheduleTemplate for this channel."""
|
||||
qs = ScheduleTemplate.objects.filter(
|
||||
channel=self.channel, is_active=True
|
||||
).order_by('-priority')
|
||||
return qs.first()
|
||||
|
||||
def _get_weighted_items(self, block: ScheduleBlock) -> list:
|
||||
"""
|
||||
Build a weighted pool of MediaItems respecting ChannelSourceRule.
|
||||
|
||||
Returns a flat list with items duplicated according to their effective
|
||||
weight (rounded to nearest int, min 1) so random.choice() gives the
|
||||
right probability distribution without needing numpy.
|
||||
"""
|
||||
rules = list(
|
||||
ChannelSourceRule.objects.filter(channel=self.channel)
|
||||
.select_related('media_source')
|
||||
)
|
||||
|
||||
if rules:
|
||||
# ── Rules exist: build filtered + weighted pool ───────────────
|
||||
allowed_source_ids = set() # allow + prefer
|
||||
blocked_source_ids = set() # block
|
||||
avoid_source_ids = set() # avoid
|
||||
source_weights: dict[int, float] = {}
|
||||
|
||||
for rule in rules:
|
||||
sid = rule.media_source_id
|
||||
mode = rule.rule_mode
|
||||
w = float(rule.weight or 1.0)
|
||||
|
||||
if mode == 'block':
|
||||
blocked_source_ids.add(sid)
|
||||
elif mode == 'avoid':
|
||||
avoid_source_ids.add(sid)
|
||||
source_weights[sid] = w * 0.1 # heavily discounted
|
||||
elif mode == 'prefer':
|
||||
allowed_source_ids.add(sid)
|
||||
source_weights[sid] = w * 3.0 # boosted
|
||||
else: # 'allow'
|
||||
allowed_source_ids.add(sid)
|
||||
source_weights[sid] = w
|
||||
|
||||
# Build base queryset from allowed + avoid sources (not blocked)
|
||||
eligible_source_ids = (allowed_source_ids | avoid_source_ids) - blocked_source_ids
|
||||
|
||||
if not eligible_source_ids:
|
||||
return []
|
||||
|
||||
base_qs = MediaItem.objects.filter(
|
||||
media_source_id__in=eligible_source_ids,
|
||||
is_active=True,
|
||||
).exclude(item_kind='bumper').select_related('media_source')
|
||||
|
||||
else:
|
||||
# ── No rules: fall back to full library (old behaviour) ────────
|
||||
base_qs = MediaItem.objects.filter(
|
||||
media_source__library=self.channel.library,
|
||||
is_active=True,
|
||||
).exclude(item_kind='bumper')
|
||||
source_weights = {}
|
||||
|
||||
# Optionally filter by genre if block specifies one
|
||||
if block.default_genre:
|
||||
base_qs = base_qs.filter(genres=block.default_genre)
|
||||
|
||||
items = list(base_qs)
|
||||
if not items:
|
||||
return []
|
||||
|
||||
if not source_weights:
|
||||
# No weight information — plain shuffle
|
||||
random.shuffle(items)
|
||||
return items
|
||||
|
||||
# Build weighted list: each item appears ⌈weight⌉ times
|
||||
weighted: list[MediaItem] = []
|
||||
for item in items:
|
||||
w = source_weights.get(item.media_source_id, 1.0)
|
||||
copies = max(1, round(w))
|
||||
weighted.extend([item] * copies)
|
||||
|
||||
random.shuffle(weighted)
|
||||
return weighted
|
||||
|
||||
def _fill_block(
|
||||
self,
|
||||
template: ScheduleTemplate,
|
||||
block: ScheduleBlock,
|
||||
start_dt: datetime,
|
||||
end_dt: datetime,
|
||||
items: list,
|
||||
) -> int:
|
||||
"""Fill start_dt→end_dt with sequential Airings, cycling through items."""
|
||||
cursor = start_dt
|
||||
idx = 0
|
||||
created = 0
|
||||
batch = uuid.uuid4()
|
||||
|
||||
while cursor < end_dt:
|
||||
item = items[idx % len(items)]
|
||||
idx += 1
|
||||
|
||||
duration = timedelta(seconds=max(item.runtime_seconds or 1800, 1))
|
||||
|
||||
# Don't let a single item overshoot the end by more than its own length
|
||||
if cursor + duration > end_dt + timedelta(hours=1):
|
||||
break
|
||||
|
||||
Airing.objects.create(
|
||||
channel=self.channel,
|
||||
schedule_template=template,
|
||||
schedule_block=block,
|
||||
media_item=item,
|
||||
starts_at=cursor,
|
||||
ends_at=cursor + duration,
|
||||
slot_kind="program",
|
||||
status="scheduled",
|
||||
source_reason="template",
|
||||
generation_batch_uuid=batch,
|
||||
)
|
||||
|
||||
cursor += duration
|
||||
created += 1
|
||||
|
||||
return created
|
||||
|
||||
244
core/services/youtube.py
Normal file
244
core/services/youtube.py
Normal file
@@ -0,0 +1,244 @@
|
||||
"""
|
||||
YouTube source sync service.
|
||||
|
||||
Two-phase design:
|
||||
Phase 1 — METADATA ONLY (sync_source):
|
||||
Crawls a YouTube channel or playlist and upserts MediaItem rows with
|
||||
title, duration, thumbnail etc. No video files are downloaded.
|
||||
A max_videos cap keeps this fast for large channels.
|
||||
|
||||
Phase 2 — DOWNLOAD ON DEMAND (download_for_airing):
|
||||
Called only by `python manage.py cache_upcoming` immediately before
|
||||
a scheduled Airing. Downloads only the specific video needed.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import yt_dlp
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
from core.models import MediaItem, MediaSource
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
YOUTUBE_SOURCE_TYPES = {
|
||||
MediaSource.SourceType.YOUTUBE_CHANNEL,
|
||||
MediaSource.SourceType.YOUTUBE_PLAYLIST,
|
||||
}
|
||||
|
||||
|
||||
def _cache_dir() -> Path:
|
||||
"""Return (and create) the directory where downloaded videos are stored."""
|
||||
root = Path(getattr(settings, "MEDIA_ROOT", "/tmp/pytv_cache"))
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
return root
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# metadata extraction (no download)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _extract_playlist_info(url: str, max_videos: int | None = None) -> list[dict]:
|
||||
"""
|
||||
Use yt-dlp to extract metadata for up to `max_videos` videos in a
|
||||
channel/playlist without downloading any files.
|
||||
|
||||
`extract_flat=True` is crucial — it fetches only a lightweight index
|
||||
(title, id, duration) rather than resolving full stream URLs, which
|
||||
makes crawling large channels orders of magnitude faster.
|
||||
|
||||
Returns a list of yt-dlp info dicts (most-recent first for channels).
|
||||
"""
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"extract_flat": True, # metadata only — NO stream/download URLs
|
||||
"ignoreerrors": True,
|
||||
}
|
||||
if max_videos is not None:
|
||||
# yt-dlp uses 1-based playlist indices; playlistend limits how many
|
||||
# entries are fetched from the source before returning.
|
||||
ydl_opts["playlistend"] = max_videos
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(url, download=False)
|
||||
|
||||
if info is None:
|
||||
return []
|
||||
|
||||
# Both channels and playlists wrap entries in an "entries" key.
|
||||
entries = info.get("entries") or []
|
||||
# Flatten one extra level for channels (channel -> playlist -> entries)
|
||||
flat: list[dict] = []
|
||||
for entry in entries:
|
||||
if entry is None:
|
||||
continue
|
||||
if "entries" in entry: # nested playlist
|
||||
flat.extend(e for e in entry["entries"] if e)
|
||||
else:
|
||||
flat.append(entry)
|
||||
return flat
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def sync_source(media_source: MediaSource, max_videos: int | None = None) -> dict:
|
||||
"""
|
||||
Phase 1: Metadata-only sync.
|
||||
|
||||
Crawls a YouTube channel/playlist and upserts MediaItem rows for each
|
||||
discovered video. No video files are ever downloaded here.
|
||||
|
||||
Args:
|
||||
media_source: The MediaSource to sync.
|
||||
max_videos: Maximum number of videos to import. When None the
|
||||
defaults are applied:
|
||||
- youtube_channel → 50 (channels can have 10k+ videos)
|
||||
- youtube_playlist → 200 (playlists are usually curated)
|
||||
|
||||
Returns:
|
||||
{"created": int, "updated": int, "skipped": int}
|
||||
"""
|
||||
if media_source.source_type not in YOUTUBE_SOURCE_TYPES:
|
||||
raise ValueError(f"MediaSource {media_source.id} is not a YouTube source.")
|
||||
|
||||
# Apply sensible defaults per source type
|
||||
if max_videos is None:
|
||||
if media_source.source_type == MediaSource.SourceType.YOUTUBE_CHANNEL:
|
||||
max_videos = 50
|
||||
else:
|
||||
max_videos = 200
|
||||
|
||||
entries = _extract_playlist_info(media_source.uri, max_videos=max_videos)
|
||||
created = updated = skipped = 0
|
||||
|
||||
for entry in entries:
|
||||
video_id = entry.get("id")
|
||||
if not video_id:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
title = entry.get("title") or f"YouTube Video {video_id}"
|
||||
duration = entry.get("duration") or 0 # seconds, may be None for live
|
||||
thumbnail = entry.get("thumbnail") or ""
|
||||
description = entry.get("description") or ""
|
||||
release_year = None
|
||||
upload_date = entry.get("upload_date") # "YYYYMMDD"
|
||||
if upload_date and len(upload_date) >= 4:
|
||||
try:
|
||||
release_year = int(upload_date[:4])
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Store the YouTube watch URL in file_path so the scheduler can
|
||||
# reference it. The ACTUAL video file will only be downloaded when
|
||||
# `cache_upcoming` runs before the airing.
|
||||
video_url = entry.get("url") or f"https://www.youtube.com/watch?v={video_id}"
|
||||
|
||||
obj, was_created = MediaItem.objects.update_or_create(
|
||||
media_source=media_source,
|
||||
youtube_video_id=video_id,
|
||||
defaults={
|
||||
"title": title,
|
||||
"item_kind": MediaItem.ItemKind.MOVIE,
|
||||
"runtime_seconds": max(int(duration), 1),
|
||||
"file_path": video_url,
|
||||
"thumbnail_path": thumbnail,
|
||||
"description": description,
|
||||
"release_year": release_year,
|
||||
"metadata_json": {
|
||||
"yt_id": video_id,
|
||||
"yt_url": video_url,
|
||||
"uploader": entry.get("uploader", ""),
|
||||
},
|
||||
"is_active": True,
|
||||
},
|
||||
)
|
||||
|
||||
if was_created:
|
||||
created += 1
|
||||
else:
|
||||
updated += 1
|
||||
|
||||
# Update last-scanned timestamp
|
||||
media_source.last_scanned_at = timezone.now()
|
||||
media_source.save(update_fields=["last_scanned_at"])
|
||||
|
||||
logger.info(
|
||||
"sync_source(%s): created=%d updated=%d skipped=%d (limit=%s)",
|
||||
media_source.id,
|
||||
created,
|
||||
updated,
|
||||
skipped,
|
||||
max_videos,
|
||||
)
|
||||
return {"created": created, "updated": updated, "skipped": skipped}
|
||||
|
||||
|
||||
def download_for_airing(media_item: MediaItem) -> Path:
|
||||
"""
|
||||
Download a YouTube video to the local cache so it can be served
|
||||
directly without network dependency at airing time.
|
||||
|
||||
Returns the local Path of the downloaded file.
|
||||
Raises RuntimeError if the download fails.
|
||||
"""
|
||||
video_id = media_item.youtube_video_id
|
||||
if not video_id:
|
||||
raise ValueError(f"MediaItem {media_item.id} has no youtube_video_id.")
|
||||
|
||||
cache_dir = _cache_dir()
|
||||
# Use video_id so we can detect already-cached files quickly.
|
||||
output_template = str(cache_dir / f"{video_id}.%(ext)s")
|
||||
|
||||
# Check if already cached and not expired
|
||||
if media_item.cached_file_path:
|
||||
existing = Path(media_item.cached_file_path)
|
||||
if existing.exists():
|
||||
logger.info("cache hit: %s already at %s", video_id, existing)
|
||||
return existing
|
||||
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"outtmpl": output_template,
|
||||
# Only request pre-muxed (progressive) formats — no separate video+audio
|
||||
# streams that would require ffmpeg to merge. Falls back through:
|
||||
# 1. Best pre-muxed mp4 up to 1080p
|
||||
# 2. Any pre-muxed mp4
|
||||
# 3. Any pre-muxed webm
|
||||
# 4. Anything pre-muxed (no merger needed)
|
||||
"format": "best[ext=mp4][height<=1080]/best[ext=mp4]/best[ext=webm]/best",
|
||||
}
|
||||
|
||||
url = media_item.file_path # URL stored here by sync_source
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(url, download=True)
|
||||
|
||||
if info is None:
|
||||
raise RuntimeError(f"yt-dlp returned no info for {url}")
|
||||
|
||||
downloaded_path = Path(ydl.prepare_filename(info))
|
||||
if not downloaded_path.exists():
|
||||
# yt-dlp may have merged to .mp4 even if the template said otherwise
|
||||
mp4_path = downloaded_path.with_suffix(".mp4")
|
||||
if mp4_path.exists():
|
||||
downloaded_path = mp4_path
|
||||
else:
|
||||
raise RuntimeError(f"Expected download at {downloaded_path} but file not found.")
|
||||
|
||||
# Persist the cache location on the model
|
||||
media_item.cached_file_path = str(downloaded_path)
|
||||
media_item.save(update_fields=["cached_file_path"])
|
||||
|
||||
logger.info("downloaded %s -> %s", video_id, downloaded_path)
|
||||
return downloaded_path
|
||||
Reference in New Issue
Block a user