#!/usr/bin/env python3 import argparse import os import re import requests import subprocess import html from slimit.parser import Parser from urllib import parse GOOGLE_LINK = "https://play.google.com/books/listen?id={id}" metadata = {} parser = argparse.ArgumentParser( description=( "Automatically download chapter-timestamps from Google Books and separate an " "input-file by those timestamps. Title- and track-attributes will be set. " "Google Books url/id (or local file) and the audio-input file are required. " "PLEASE NOTE: This will not download any copyrighted material whatsoever, you " "have to provide the audiofile yourself." ), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( metavar="AUDIOFILE", dest="audio_file", action="store", type=str, help="the audiofile that tracks will be exported from", ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument( "-gi", "--google-id", metavar="GOOGLE-BOOKS-ID", dest="google_id", action="store", type=str, help="the id for the audiobook to download the track-data for (e.g. AQAAAEBsuD74QM)", ) group.add_argument( "-gl", "--google-link", metavar="GOOGLE-LINK", dest="google_link", action="store", type=str, help="the link for the audiobook to download the track-data for (e.g. https://play.google.com/books/listen?id=AQAAAEBsuD74QM)", ) group.add_argument( "-mf", "--metadata_file", metavar="METADATA_FILE", dest="metadata_file", action="store", type=str, help="the location of the file that contains the metadata for the tracks", ) parser.add_argument( "-o", "--output-folder", metavar="OUTPUT_FOLDER", type=str, help="the folder that the tracks will be written to, created if it does not exist", ) parser.add_argument( "-f", "--format", metavar="FORMAT", default="mp3", type=str, help=( "used as file-extension, ffmpeg will take this and try to derive the audio " "codec" ), ) parser.add_argument( "-cs", "--chapter-separator", default="|", help="what separates the chapter/part names from the timestamps in the file", ) parser.add_argument( "-bc", "--banned-characters", nargs="+", default="/", help="characters that may not show up in generated filenames", ) parser.add_argument( "-e", "--export-only", type=int, default=0, help="limit the number of files that will be exported per run (0 means export all)", ) parser.add_argument( "-dpp", "--dont-prepend-partnames", action="store_true", help="do not prepend the part-names to the chapter names (only works with --google-id or --google-link)", ) parser.add_argument( "-of", "--override-output", action="store_true", help="override existing output-files", ) parser.add_argument( "-ds", "--dont-skip-existing", action="store_true", help="do not skip existing output-files during export, combine with --override-outputs to actually override output-files", ) parser.add_argument( "-d", "--debug", action="store_true", help="adds some debug messages to the output" ) parser.add_argument( "-nm", "--no-metadata", action="store_true", help="do not save metadata to exported tracks (ffmpeg will still copy some on it's own)", ) parser.add_argument( "-nr", "--no-resume", action="store_true", help="ignore local metadata (only works with --google-id or --google-link)", ) args = parser.parse_args() def die(message: str = "", value=1): """ Prints the optional message then halts the script with non-zero return value. :param message: Message to be printed, defaults to "" :type message: str, optional :param value: Return value on exit, defaults to 1 :type value: int, optional """ if message: print(message) exit(value) def milli(value: str) -> float: """ Tries to convert a given string into 1/1000 :param value: Value to be converted. :type value: str :return: 1/1000 of the input on success, None on error. :rtype: float """ try: return int(value) / 1000 except ValueError: return None def save_metadata_to_file( tracks: list, destination: os.PathLike, metadata: dict = None, source: str = None ): """ Save the passed tracks into the destination file, optionally noting the source. :param tracks: A list of tracks you want to save, a list item should look like: {"name": "Chapter Name", "start": 10.0030, "end": 128.1020} :type tracks: list :param destination: Path for the file that will be written to :type destination: os.PathLike :param source: A source-reference, embedded in the file as a comment, defaults to None :type source: str, optional """ with open(destination, "w") as fp: fp.writelines( [ "# Automatically created using timestamper\n", f"# Source: {source}\n" if source else "", ] + ( [f"# @{k}{args.chapter_separator}{v}\n" for k, v in metadata.items()] if metadata else [] ) + [ f"{t.get('name')}{args.chapter_separator}{t.get('start')}|{t.get('end')}\n" for t in tracks ] ) def load_metadata_from_file(path: os.PathLike) -> list: """ Load the track-metadata from a file :param path: Path to the file the metadata will be read from. :type path: os.PathLike :return: A list of dicts, containing the tracks :rtype: list """ metadata = {} if os.path.exists(path): try: tracks = [] with open(path, "r") as fp: track_lines = fp.readlines() for line in track_lines: if line.startswith("# @"): parts = line.lstrip("# @").split(args.chapter_separator) if len(parts) != 2: print(f"Ignored malformed line: {line}") continue metadata[parts[0]] = parts[1].strip() elif line.startswith("#") or not len(line.strip()): continue elif line.count(args.chapter_separator) != 2: die( f'Line "{line}" does not contain the chapter-name/time ' f'separator "{args.chapter_separator}" (only) once.' ) parts = line.split(args.chapter_separator) tracks.append( { "name": parts[0].strip(), "start": parts[1].strip(), "end": parts[2].strip(), } ) return (tracks, metadata) except PermissionError: die("track_file: no permission to read {}".format(args.track_file)) def get_tracks_from_google(url: str) -> list: """ Load metadata from google. :param url: Metadata will be scraped from the page identified by this URL. :type url: str :return: [description] :rtype: list """ metadata = {} req = requests.get(url) if req.status_code == 200: _html = req.text else: die(f"Could not get page from google: {url}") matches = re.findall("_OC_contentInfo = ([^;]*)", _html) if not matches: die("No chapters found in the google-page, maybe the markup has changed?") parser = Parser(yacc_optimize=False, lex_optimize=False) tree = parser.parse(matches[0]) chapters = tree.children()[0].children()[0].children()[0].children()[0].children() matches = re.findall(r'(.*) - Google Play<\/title>', _html) if matches: metadata["title"] = html.unescape(matches[0]) tracks = [] current_part = "" last_timestamp = 0 last_name = "" for c in chapters: item_number = len(c.items) a = "" b = "" try: a = c.items[0].value.strip('"') b = c.items[1].value.strip('"') except IndexError: pass if item_number == 1: last_name = a elif item_number == 2: current_part = a if milli(c.items[1].value.strip('"')) != last_timestamp: tracks.append( { "name": last_name, "start": last_timestamp, "end": milli(b), } ) last_name = a last_timestamp = milli(b) elif item_number == 3: if milli(b) != last_timestamp: tracks.append( { "name": last_name, "start": last_timestamp, "end": milli(b), } ) if current_part: last_name = f"{current_part}: " + a else: last_name = a last_timestamp = milli(b) tracks.append( { "name": last_name, "start": last_timestamp, "end": "", } ) return tracks, metadata def main(): tracks = [] if not os.path.exists(args.audio_file): die("AUDIO_FILE: {} doesn't exist.".format(args.audio_file)) if not args.output_folder: args.output_folder = os.getcwd() export_folder = os.path.abspath(args.output_folder) if os.path.exists(export_folder): pass elif os.path.exists(os.path.dirname(export_folder)): os.makedirs(export_folder) else: die(f'OUTPUT_FOLDER: "{export_folder}" does not exist.') if args.google_id: args.google_link = GOOGLE_LINK.format(id=args.google_id) if args.google_link: parsed = parse.urlparse(args.google_link) id = parse.parse_qs(parsed.query).get("id") if not id: die(f"This does not look right: {args.google_link}") id = id[0] destination = f"{export_folder}{os.path.sep}{id}.txt" if os.path.exists(destination) and not args.no_resume: print(f"Using local metadata: {destination}") tracks, metadata = load_metadata_from_file(destination) else: tracks, metadata = get_tracks_from_google(args.google_link) if metadata.get("title"): export_folder = ( os.getcwd() + os.path.sep + "".join( c for c in metadata.get("title") if c not in args.banned_characters ) ) destination = f"{export_folder}{os.path.sep}{id}.txt" if os.path.exists(export_folder): pass elif os.path.exists(os.path.dirname(export_folder)): os.makedirs(export_folder) else: die(f'OUTPUT_FOLDER: "{export_folder}" does not exist.') save_metadata_to_file( tracks, destination, metadata=metadata, source=args.google_link ) if args.metadata_file: tracks, metadata = load_metadata_from_file(args.metadata_file) try: if not tracks: die( 'No tracks found, check format: "Chapter name|(hh:mm:ss|mm:ss|s*)", the ' "time represents the length of the chapter." ) except NameError: die("Error in reading the TRACKFILE or parsing the google page.") print(f"{len(tracks)} tracks to export to {export_folder}.") if args.export_only > 0: print(f"We are only exporting {args.export_only}.") exported = 0 skipped = 0 try: iterator = 1 for track in tracks: chapter_name = ( f"{str(iterator).zfill(len(str(len(tracks))))}. " + "".join( char for char in track["name"] if char not in args.banned_characters ).strip() ) meta_title = "".join( char for char in track["name"] if char not in args.banned_characters ).strip() destination = f"{export_folder}{os.path.sep}{chapter_name}.{args.format}" if (not args.dont_skip_existing) and os.path.exists(destination): print(f'Skipped "{chapter_name}" ({iterator}/{len(tracks)})') skipped += 1 iterator += 1 continue command = ( [ "ffmpeg", "-y" if args.override_output else "-n", "-ss", str(track["start"]), "-i", args.audio_file, ] + (["-to", str(track["end"]), "-copyts"] if track["end"] else []) + [ "-metadata", f"title={meta_title}", "-metadata", f"track={iterator}", f"{destination}", ] ) if args.debug: print(" ".join(command)) run_result = subprocess.run( command, capture_output=True, ) if run_result.returncode != 0: print(f"command was: '{' '.join(command)}'") die(f"ffmpeg did not terminate normally: {str(run_result.stderr)}") print(f'Exported "{chapter_name}" ({iterator}/{len(tracks)})') exported += 1 iterator += 1 if args.export_only > 0 and args.export_only == exported: break except KeyboardInterrupt: print("\nKeyboardInterrupt, aborted processing.\n") print(f"Exported {exported} tracks to {export_folder}.") if skipped: print(f"Skipped {skipped} files that already existed.") if __name__ == "__main__": main()