import-comments.py

"""
This is a script to import comments or generate comments for testing purposes.

Usage:
    python import-comments.py <input_fil>
        input_file: metadata.info.json file generated by yt-dlp containing comments.
"""

import os
import sys
import re
import json
from datetime import datetime
from dotenv import load_dotenv
from elasticsearch import Elasticsearch, helpers
import logging

es_logger = logging.getLogger("elasticsearch")
es_logger.setLevel(logging.DEBUG)

load_dotenv()

es = Elasticsearch(
    [os.getenv("ES_HOST")], basic_auth=(os.getenv("ES_USER"), os.getenv("ES_PASSWORD"))
).options(ignore_status=404)


def format_comments(comments_raw):
    """process comments to match format"""
    comments = []

    if comments_raw:
        for comment in comments_raw:
            cleaned_comment = clean_comment(comment)
            if not cleaned_comment:
                continue

            comments.append(cleaned_comment)

    return comments


def clean_comment(comment):
    """parse metadata from comment for indexing"""
    if not comment.get("text"):
        return False

    time_text_datetime = datetime.utcfromtimestamp(comment["timestamp"])

    time_text = time_text_datetime.timestamp()

    if not comment.get("author"):
        comment["author"] = comment.get("author_id", "Unknown")

    cleaned_comment = {
        "comment_id": comment["id"],
        "comment_text": comment["text"].replace("\xa0", ""),
        "comment_timestamp": comment["timestamp"],
        "comment_time_text": time_text,
        "comment_likecount": comment.get("like_count", None),
        "comment_is_favorited": comment.get("is_favorited", False),
        "comment_author": comment["author"],
        "comment_author_id": comment["author_id"],
        "comment_author_thumbnail": comment["author_thumbnail"],
        "comment_author_is_uploader": comment.get("author_is_uploader", False),
        "comment_parent": comment["parent"],
    }

    return cleaned_comment


def extract_video_id(filename):
    """Extracts video ID from the filename which is enclosed in square brackets."""
    base_name, _ = os.path.splitext(filename)
    id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]", base_name)
    if id_search:
        youtube_id = id_search.group(1)
        return youtube_id


def main():
    if len(sys.argv) < 2:
        print(f"Usage: python {os.path.basename(__file__)} <input_dir>")
        sys.exit(1)

    filename = sys.argv[1]
    youtube_id = extract_video_id(filename)

    if not youtube_id:
        print(f"Could not extract video ID from {filename}")
        sys.exit(1)

    channel_id = None
    formatted_comments = []
    with open(filename, "r", encoding="utf-8") as f:
        comments_json = json.load(f)

        channel_id = comments_json.get("channel_id")
        if not channel_id:
            print(f"Could not extract channel ID from {filename}. Bad JSON?")
            sys.exit(1)

        comments_data = comments_json["comments"]
        formatted_comments = format_comments(comments_data)

    # uncomment me for testing
    # for i in range(100000):
    #    formatted_comments.append(
    #        {
    #            "comment_id": f"comment_id_{i}",
    #            "comment_text": f"comment_text_{i}",
    #            "comment_timestamp": i,
    #            "comment_time_text": i,
    #            "comment_likecount": i,
    #            "comment_is_favorited": False,
    #            "comment_author": f"comment_author_{i}",
    #            "comment_author_id": f"comment_author_id_{i}",
    #            "comment_author_thumbnail": f"comment_author_thumbnail_{i}",
    #            "comment_author_is_uploader": False,
    #            "comment_parent": f"comment_parent_{i}",
    #        }
    #    )

    print(
        f"Importing comments will delete all comments for video {youtube_id} and replace them with the new comments."
    )
    confirm = input("Continue? (y/n): ")
    if confirm.lower() != "y":
        print("Aborting")
        sys.exit(1)

    # delete the comments
    es.delete(index="ta_comment", id=youtube_id, refresh=True)

    print("Deleted comments for video {youtube_id}")

    # Prepare bulk insert
    actions = [
        {
            "_index": "ta_comment",
            "_id": youtube_id,
            "_source": {
                "youtube_id": youtube_id,
                "comment_last_refresh": int(datetime.now().timestamp()),
                "comment_channel_id": channel_id,
                "comment_comments": formatted_comments,
            },
        }
    ]

    print("Importing comments now")

    # Perform bulk insert of new comments
    success, errors = helpers.bulk(es, actions)
    if success == 1:
        print(f"Successfully imported comments for video {youtube_id}")

    if len(errors) > 0:
        print("Errors encountered during import!")
        print(errors)


if __name__ == "__main__":
    main()