commit de7139ab77796ed728825cac0911314210d0fe84 Author: Markus Pawlata Date: Wed Dec 9 01:33:55 2020 +0100 Inital commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b2fdc40 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests==2.25.0 +slimit==0.8.1 \ No newline at end of file diff --git a/timestamper.py b/timestamper.py new file mode 100755 index 0000000..d319845 --- /dev/null +++ b/timestamper.py @@ -0,0 +1,452 @@ +#!/usr/bin/env python3 +import argparse +import os +import re +import requests +import subprocess +import html + +from slimit.parser import Parser +from urllib import parse + +GOOGLE_LINK = "https://play.google.com/books/listen?id={id}" +metadata = {} + +parser = argparse.ArgumentParser( + description=( + "Automatically download chapter-timestamps from Google Books and separate an " + "input-file by those timestamps. Title- and track-attributes will be set. " + "Google Books url/id (or local file) and the audio-input file are required. " + "PLEASE NOTE: This will not download any copyrighted material whatsoever, you " + "have to provide the audiofile yourself." + ), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) +parser.add_argument( + metavar="AUDIOFILE", + dest="audio_file", + action="store", + type=str, + help="the audiofile that tracks will be exported from", +) +group = parser.add_mutually_exclusive_group(required=True) +group.add_argument( + "-gi", + "--google-id", + metavar="GOOGLE-BOOKS-ID", + dest="google_id", + action="store", + type=str, + help="the id for the audiobook to download the track-data for (e.g. AQAAAEBsuD74QM)", +) +group.add_argument( + "-gl", + "--google-link", + metavar="GOOGLE-LINK", + dest="google_link", + action="store", + type=str, + help="the link for the audiobook to download the track-data for (e.g. https://play.google.com/books/listen?id=AQAAAEBsuD74QM)", +) +group.add_argument( + "-mf", + "--metadata_file", + metavar="METADATA_FILE", + dest="metadata_file", + action="store", + type=str, + help="the location of the file that contains the metadata for the tracks", +) + +parser.add_argument( + "-o", + "--output-folder", + metavar="OUTPUT_FOLDER", + type=str, + help="the folder that the tracks will be written to, created if it does not exist", +) +parser.add_argument( + "-f", + "--format", + metavar="FORMAT", + default="mp3", + type=str, + help=( + "used as file-extension, ffmpeg will take this and try to derive the audio " + "codec" + ), +) +parser.add_argument( + "-cs", + "--chapter-separator", + default="|", + help="what separates the chapter/part names from the timestamps in the file", +) +parser.add_argument( + "-bc", + "--banned-characters", + nargs="+", + default="/", + help="characters that may not show up in generated filenames", +) +parser.add_argument( + "-e", + "--export-only", + type=int, + default=0, + help="limit the number of files that will be exported per run (0 means export all)", +) +parser.add_argument( + "-dpp", + "--dont-prepend-partnames", + action="store_true", + help="do not prepend the part-names to the chapter names (only works with --google-id or --google-link)", +) +parser.add_argument( + "-of", + "--override-output", + action="store_true", + help="override existing output-files", +) +parser.add_argument( + "-ds", + "--dont-skip-existing", + action="store_true", + help="do not skip existing output-files during export, combine with --override-outputs to actually override output-files", +) +parser.add_argument( + "-d", "--debug", action="store_true", help="adds some debug messages to the output" +) +parser.add_argument( + "-nm", + "--no-metadata", + action="store_true", + help="do not save metadata to exported tracks (ffmpeg will still copy some on it's own)", +) +parser.add_argument( + "-nr", + "--no-resume", + action="store_true", + help="ignore local metadata (only works with --google-id or --google-link)", +) +args = parser.parse_args() + + +def die(message: str = "", value=1): + """ + Prints the optional message then halts the script with non-zero return value. + + :param message: Message to be printed, defaults to "" + :type message: str, optional + :param value: Return value on exit, defaults to 1 + :type value: int, optional + """ + if message: + print(message) + exit(value) + + +def milli(value: str) -> float: + """ + Tries to convert a given string into 1/1000 + + :param value: Value to be converted. + :type value: str + :return: 1/1000 of the input on success, None on error. + :rtype: float + """ + try: + return int(value) / 1000 + except ValueError: + return None + + +def save_metadata_to_file( + tracks: list, destination: os.PathLike, metadata: dict = None, source: str = None +): + """ + Save the passed tracks into the destination file, optionally noting the source. + + :param tracks: A list of tracks you want to save, a list item should look like: {"name": "Chapter Name", "start": 10.0030, "end": 128.1020} + :type tracks: list + :param destination: Path for the file that will be written to + :type destination: os.PathLike + :param source: A source-reference, embedded in the file as a comment, defaults to None + :type source: str, optional + """ + with open(destination, "w") as fp: + fp.writelines( + [ + "# Automatically created using timestamper\n", + f"# Source: {source}\n" if source else "", + ] + + ( + [f"# @{k}{args.chapter_separator}{v}\n" for k, v in metadata.items()] + if metadata + else [] + ) + + [ + f"{t.get('name')}{args.chapter_separator}{t.get('start')}|{t.get('end')}\n" + for t in tracks + ] + ) + + +def load_metadata_from_file(path: os.PathLike) -> list: + """ + Load the track-metadata from a file + + :param path: Path to the file the metadata will be read from. + :type path: os.PathLike + :return: A list of dicts, containing the tracks + :rtype: list + """ + metadata = {} + if os.path.exists(path): + try: + tracks = [] + with open(path, "r") as fp: + track_lines = fp.readlines() + for line in track_lines: + if line.startswith("# @"): + parts = line.lstrip("# @").split(args.chapter_separator) + if len(parts) != 2: + print(f"Ignored malformed line: {line}") + continue + metadata[parts[0]] = parts[1].strip() + + elif line.startswith("#") or not len(line.strip()): + continue + + elif line.count(args.chapter_separator) != 2: + die( + f'Line "{line}" does not contain the chapter-name/time ' + f'separator "{args.chapter_separator}" (only) once.' + ) + + parts = line.split(args.chapter_separator) + tracks.append( + { + "name": parts[0].strip(), + "start": parts[1].strip(), + "end": parts[2].strip(), + } + ) + return (tracks, metadata) + except PermissionError: + die("track_file: no permission to read {}".format(args.track_file)) + + +def get_tracks_from_google(url: str) -> list: + """ + Load metadata from google. + + :param url: Metadata will be scraped from the page identified by this URL. + :type url: str + :return: [description] + :rtype: list + """ + metadata = {} + req = requests.get(url) + if req.status_code == 200: + _html = req.text + else: + die(f"Could not get page from google: {url}") + + matches = re.findall("_OC_contentInfo = ([^;]*)", _html) + if not matches: + die("No chapters found in the google-page, maybe the markup has changed?") + + parser = Parser(yacc_optimize=False, lex_optimize=False) + tree = parser.parse(matches[0]) + chapters = tree.children()[0].children()[0].children()[0].children()[0].children() + + matches = re.findall(r'(.*) - Google Play<\/title>', _html) + if matches: + metadata["title"] = html.unescape(matches[0]) + + tracks = [] + current_part = "" + last_timestamp = 0 + last_name = "" + + for c in chapters: + item_number = len(c.items) + a = "" + b = "" + try: + a = c.items[0].value.strip('"') + b = c.items[1].value.strip('"') + except IndexError: + pass + + if item_number == 1: + last_name = a + elif item_number == 2: + current_part = a + if milli(c.items[1].value.strip('"')) != last_timestamp: + tracks.append( + { + "name": last_name, + "start": last_timestamp, + "end": milli(b), + } + ) + last_name = a + last_timestamp = milli(b) + elif item_number == 3: + if milli(b) != last_timestamp: + tracks.append( + { + "name": last_name, + "start": last_timestamp, + "end": milli(b), + } + ) + if current_part: + last_name = f"{current_part}: " + a + else: + last_name = a + last_timestamp = milli(b) + + tracks.append( + { + "name": last_name, + "start": last_timestamp, + "end": "", + } + ) + return tracks, metadata + + +def main(): + tracks = [] + if not os.path.exists(args.audio_file): + die("AUDIO_FILE: {} doesn't exist.".format(args.audio_file)) + + if not args.output_folder: + args.output_folder = os.getcwd() + + export_folder = os.path.abspath(args.output_folder) + if os.path.exists(export_folder): + pass + elif os.path.exists(os.path.dirname(export_folder)): + os.makedirs(export_folder) + else: + die(f'OUTPUT_FOLDER: "{export_folder}" does not exist.') + + if args.google_id: + args.google_link = GOOGLE_LINK.format(id=args.google_id) + + if args.google_link: + parsed = parse.urlparse(args.google_link) + id = parse.parse_qs(parsed.query).get("id") + if not id: + die(f"This does not look right: {args.google_link}") + id = id[0] + destination = f"{export_folder}{os.path.sep}{id}.txt" + if os.path.exists(destination) and not args.no_resume: + print(f"Using local metadata: {destination}") + tracks, metadata = load_metadata_from_file(destination) + else: + tracks, metadata = get_tracks_from_google(args.google_link) + if metadata.get("title"): + export_folder = ( + os.getcwd() + + os.path.sep + + "".join( + c + for c in metadata.get("title") + if c not in args.banned_characters + ) + ) + destination = f"{export_folder}{os.path.sep}{id}.txt" + if os.path.exists(export_folder): + pass + elif os.path.exists(os.path.dirname(export_folder)): + os.makedirs(export_folder) + else: + die(f'OUTPUT_FOLDER: "{export_folder}" does not exist.') + save_metadata_to_file( + tracks, destination, metadata=metadata, source=args.google_link + ) + + if args.metadata_file: + tracks, metadata = load_metadata_from_file(args.metadata_file) + + try: + if not tracks: + die( + 'No tracks found, check format: "Chapter name|(hh:mm:ss|mm:ss|s*)", the ' + "time represents the length of the chapter." + ) + except NameError: + die("Error in reading the TRACKFILE or parsing the google page.") + + print(f"{len(tracks)} tracks to export to {export_folder}.") + if args.export_only > 0: + print(f"We are only exporting {args.export_only}.") + exported = 0 + skipped = 0 + try: + iterator = 1 + for track in tracks: + chapter_name = ( + f"{str(iterator).zfill(len(str(len(tracks))))}. " + + "".join( + char for char in track["name"] if char not in args.banned_characters + ).strip() + ) + meta_title = "".join( + char for char in track["name"] if char not in args.banned_characters + ).strip() + destination = f"{export_folder}{os.path.sep}{chapter_name}.{args.format}" + if (not args.dont_skip_existing) and os.path.exists(destination): + print(f'Skipped "{chapter_name}" ({iterator}/{len(tracks)})') + skipped += 1 + iterator += 1 + continue + command = ( + [ + "ffmpeg", + "-y" if args.override_output else "-n", + "-ss", + str(track["start"]), + "-i", + args.audio_file, + ] + + (["-to", str(track["end"]), "-copyts"] if track["end"] else []) + + [ + "-metadata", + f"title={meta_title}", + "-metadata", + f"track={iterator}", + f"{destination}", + ] + ) + + if args.debug: + print(" ".join(command)) + run_result = subprocess.run( + command, + capture_output=True, + ) + if run_result.returncode != 0: + print(f"command was: '{' '.join(command)}'") + die(f"ffmpeg did not terminate normally: {str(run_result.stderr)}") + + print(f'Exported "{chapter_name}" ({iterator}/{len(tracks)})') + exported += 1 + iterator += 1 + if args.export_only > 0 and args.export_only == exported: + break + + except KeyboardInterrupt: + print("\nKeyboardInterrupt, aborted processing.\n") + print(f"Exported {exported} tracks to {export_folder}.") + if skipped: + print(f"Skipped {skipped} files that already existed.") + + +if __name__ == "__main__": + main()