Inital commit
This commit is contained in:
		
							
								
								
									
										2
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,2 @@
 | 
			
		||||
requests==2.25.0
 | 
			
		||||
slimit==0.8.1
 | 
			
		||||
							
								
								
									
										452
									
								
								timestamper.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										452
									
								
								timestamper.py
									
									
									
									
									
										Executable file
									
								
							@ -0,0 +1,452 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
import argparse
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import requests
 | 
			
		||||
import subprocess
 | 
			
		||||
import html
 | 
			
		||||
 | 
			
		||||
from slimit.parser import Parser
 | 
			
		||||
from urllib import parse
 | 
			
		||||
 | 
			
		||||
GOOGLE_LINK = "https://play.google.com/books/listen?id={id}"
 | 
			
		||||
metadata = {}
 | 
			
		||||
 | 
			
		||||
parser = argparse.ArgumentParser(
 | 
			
		||||
    description=(
 | 
			
		||||
        "Automatically download chapter-timestamps from Google Books and separate an "
 | 
			
		||||
        "input-file by those timestamps. Title- and track-attributes will be set. "
 | 
			
		||||
        "Google Books url/id (or local file) and the audio-input file are required. "
 | 
			
		||||
        "PLEASE NOTE: This will not download any copyrighted material whatsoever, you "
 | 
			
		||||
        "have to provide the audiofile yourself."
 | 
			
		||||
    ),
 | 
			
		||||
    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 | 
			
		||||
)
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    metavar="AUDIOFILE",
 | 
			
		||||
    dest="audio_file",
 | 
			
		||||
    action="store",
 | 
			
		||||
    type=str,
 | 
			
		||||
    help="the audiofile that tracks will be exported from",
 | 
			
		||||
)
 | 
			
		||||
group = parser.add_mutually_exclusive_group(required=True)
 | 
			
		||||
group.add_argument(
 | 
			
		||||
    "-gi",
 | 
			
		||||
    "--google-id",
 | 
			
		||||
    metavar="GOOGLE-BOOKS-ID",
 | 
			
		||||
    dest="google_id",
 | 
			
		||||
    action="store",
 | 
			
		||||
    type=str,
 | 
			
		||||
    help="the id for the audiobook to download the track-data for (e.g. AQAAAEBsuD74QM)",
 | 
			
		||||
)
 | 
			
		||||
group.add_argument(
 | 
			
		||||
    "-gl",
 | 
			
		||||
    "--google-link",
 | 
			
		||||
    metavar="GOOGLE-LINK",
 | 
			
		||||
    dest="google_link",
 | 
			
		||||
    action="store",
 | 
			
		||||
    type=str,
 | 
			
		||||
    help="the link for the audiobook to download the track-data for (e.g. https://play.google.com/books/listen?id=AQAAAEBsuD74QM)",
 | 
			
		||||
)
 | 
			
		||||
group.add_argument(
 | 
			
		||||
    "-mf",
 | 
			
		||||
    "--metadata_file",
 | 
			
		||||
    metavar="METADATA_FILE",
 | 
			
		||||
    dest="metadata_file",
 | 
			
		||||
    action="store",
 | 
			
		||||
    type=str,
 | 
			
		||||
    help="the location of the file that contains the metadata for the tracks",
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    "-o",
 | 
			
		||||
    "--output-folder",
 | 
			
		||||
    metavar="OUTPUT_FOLDER",
 | 
			
		||||
    type=str,
 | 
			
		||||
    help="the folder that the tracks will be written to, created if it does not exist",
 | 
			
		||||
)
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    "-f",
 | 
			
		||||
    "--format",
 | 
			
		||||
    metavar="FORMAT",
 | 
			
		||||
    default="mp3",
 | 
			
		||||
    type=str,
 | 
			
		||||
    help=(
 | 
			
		||||
        "used as file-extension, ffmpeg will take this and try to derive the audio "
 | 
			
		||||
        "codec"
 | 
			
		||||
    ),
 | 
			
		||||
)
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    "-cs",
 | 
			
		||||
    "--chapter-separator",
 | 
			
		||||
    default="|",
 | 
			
		||||
    help="what separates the chapter/part names from the timestamps in the file",
 | 
			
		||||
)
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    "-bc",
 | 
			
		||||
    "--banned-characters",
 | 
			
		||||
    nargs="+",
 | 
			
		||||
    default="/",
 | 
			
		||||
    help="characters that may not show up in generated filenames",
 | 
			
		||||
)
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    "-e",
 | 
			
		||||
    "--export-only",
 | 
			
		||||
    type=int,
 | 
			
		||||
    default=0,
 | 
			
		||||
    help="limit the number of files that will be exported per run (0 means export all)",
 | 
			
		||||
)
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    "-dpp",
 | 
			
		||||
    "--dont-prepend-partnames",
 | 
			
		||||
    action="store_true",
 | 
			
		||||
    help="do not prepend the part-names to the chapter names (only works with --google-id or --google-link)",
 | 
			
		||||
)
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    "-of",
 | 
			
		||||
    "--override-output",
 | 
			
		||||
    action="store_true",
 | 
			
		||||
    help="override existing output-files",
 | 
			
		||||
)
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    "-ds",
 | 
			
		||||
    "--dont-skip-existing",
 | 
			
		||||
    action="store_true",
 | 
			
		||||
    help="do not skip existing output-files during export, combine with --override-outputs to actually override output-files",
 | 
			
		||||
)
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    "-d", "--debug", action="store_true", help="adds some debug messages to the output"
 | 
			
		||||
)
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    "-nm",
 | 
			
		||||
    "--no-metadata",
 | 
			
		||||
    action="store_true",
 | 
			
		||||
    help="do not save metadata to exported tracks (ffmpeg will still copy some on it's own)",
 | 
			
		||||
)
 | 
			
		||||
parser.add_argument(
 | 
			
		||||
    "-nr",
 | 
			
		||||
    "--no-resume",
 | 
			
		||||
    action="store_true",
 | 
			
		||||
    help="ignore local metadata (only works with --google-id or --google-link)",
 | 
			
		||||
)
 | 
			
		||||
args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def die(message: str = "", value=1):
 | 
			
		||||
    """
 | 
			
		||||
    Prints the optional message then halts the script with non-zero return value.
 | 
			
		||||
 | 
			
		||||
    :param message: Message to be printed, defaults to ""
 | 
			
		||||
    :type message: str, optional
 | 
			
		||||
    :param value: Return value on exit, defaults to 1
 | 
			
		||||
    :type value: int, optional
 | 
			
		||||
    """
 | 
			
		||||
    if message:
 | 
			
		||||
        print(message)
 | 
			
		||||
    exit(value)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def milli(value: str) -> float:
 | 
			
		||||
    """
 | 
			
		||||
    Tries to convert a given string into 1/1000
 | 
			
		||||
 | 
			
		||||
    :param value: Value to be converted.
 | 
			
		||||
    :type value: str
 | 
			
		||||
    :return: 1/1000 of the input on success, None on error.
 | 
			
		||||
    :rtype: float
 | 
			
		||||
    """
 | 
			
		||||
    try:
 | 
			
		||||
        return int(value) / 1000
 | 
			
		||||
    except ValueError:
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def save_metadata_to_file(
 | 
			
		||||
    tracks: list, destination: os.PathLike, metadata: dict = None, source: str = None
 | 
			
		||||
):
 | 
			
		||||
    """
 | 
			
		||||
    Save the passed tracks into the destination file, optionally noting the source.
 | 
			
		||||
 | 
			
		||||
    :param tracks: A list of tracks you want to save, a list item should look like: {"name": "Chapter Name", "start": 10.0030, "end": 128.1020}
 | 
			
		||||
    :type tracks: list
 | 
			
		||||
    :param destination: Path for the file that will be written to
 | 
			
		||||
    :type destination: os.PathLike
 | 
			
		||||
    :param source: A source-reference, embedded in the file as a comment, defaults to None
 | 
			
		||||
    :type source: str, optional
 | 
			
		||||
    """
 | 
			
		||||
    with open(destination, "w") as fp:
 | 
			
		||||
        fp.writelines(
 | 
			
		||||
            [
 | 
			
		||||
                "# Automatically created using timestamper\n",
 | 
			
		||||
                f"# Source: {source}\n" if source else "",
 | 
			
		||||
            ]
 | 
			
		||||
            + (
 | 
			
		||||
                [f"# @{k}{args.chapter_separator}{v}\n" for k, v in metadata.items()]
 | 
			
		||||
                if metadata
 | 
			
		||||
                else []
 | 
			
		||||
            )
 | 
			
		||||
            + [
 | 
			
		||||
                f"{t.get('name')}{args.chapter_separator}{t.get('start')}|{t.get('end')}\n"
 | 
			
		||||
                for t in tracks
 | 
			
		||||
            ]
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def load_metadata_from_file(path: os.PathLike) -> list:
 | 
			
		||||
    """
 | 
			
		||||
    Load the track-metadata from a file
 | 
			
		||||
 | 
			
		||||
    :param path: Path to the file the metadata will be read from.
 | 
			
		||||
    :type path: os.PathLike
 | 
			
		||||
    :return: A list of dicts, containing the tracks
 | 
			
		||||
    :rtype: list
 | 
			
		||||
    """
 | 
			
		||||
    metadata = {}
 | 
			
		||||
    if os.path.exists(path):
 | 
			
		||||
        try:
 | 
			
		||||
            tracks = []
 | 
			
		||||
            with open(path, "r") as fp:
 | 
			
		||||
                track_lines = fp.readlines()
 | 
			
		||||
                for line in track_lines:
 | 
			
		||||
                    if line.startswith("# @"):
 | 
			
		||||
                        parts = line.lstrip("# @").split(args.chapter_separator)
 | 
			
		||||
                        if len(parts) != 2:
 | 
			
		||||
                            print(f"Ignored malformed line: {line}")
 | 
			
		||||
                            continue
 | 
			
		||||
                        metadata[parts[0]] = parts[1].strip()
 | 
			
		||||
 | 
			
		||||
                    elif line.startswith("#") or not len(line.strip()):
 | 
			
		||||
                        continue
 | 
			
		||||
 | 
			
		||||
                    elif line.count(args.chapter_separator) != 2:
 | 
			
		||||
                        die(
 | 
			
		||||
                            f'Line "{line}" does not contain the chapter-name/time '
 | 
			
		||||
                            f'separator "{args.chapter_separator}" (only) once.'
 | 
			
		||||
                        )
 | 
			
		||||
 | 
			
		||||
                    parts = line.split(args.chapter_separator)
 | 
			
		||||
                    tracks.append(
 | 
			
		||||
                        {
 | 
			
		||||
                            "name": parts[0].strip(),
 | 
			
		||||
                            "start": parts[1].strip(),
 | 
			
		||||
                            "end": parts[2].strip(),
 | 
			
		||||
                        }
 | 
			
		||||
                    )
 | 
			
		||||
            return (tracks, metadata)
 | 
			
		||||
        except PermissionError:
 | 
			
		||||
            die("track_file: no permission to read {}".format(args.track_file))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_tracks_from_google(url: str) -> list:
 | 
			
		||||
    """
 | 
			
		||||
    Load metadata from google.
 | 
			
		||||
 | 
			
		||||
    :param url: Metadata will be scraped from the page identified by this URL.
 | 
			
		||||
    :type url: str
 | 
			
		||||
    :return: [description]
 | 
			
		||||
    :rtype: list
 | 
			
		||||
    """
 | 
			
		||||
    metadata = {}
 | 
			
		||||
    req = requests.get(url)
 | 
			
		||||
    if req.status_code == 200:
 | 
			
		||||
        _html = req.text
 | 
			
		||||
    else:
 | 
			
		||||
        die(f"Could not get page from google: {url}")
 | 
			
		||||
 | 
			
		||||
    matches = re.findall("_OC_contentInfo = ([^;]*)", _html)
 | 
			
		||||
    if not matches:
 | 
			
		||||
        die("No chapters found in the google-page, maybe the markup has changed?")
 | 
			
		||||
 | 
			
		||||
    parser = Parser(yacc_optimize=False, lex_optimize=False)
 | 
			
		||||
    tree = parser.parse(matches[0])
 | 
			
		||||
    chapters = tree.children()[0].children()[0].children()[0].children()[0].children()
 | 
			
		||||
 | 
			
		||||
    matches = re.findall(r'<title id="main-title">(.*) - Google Play<\/title>', _html)
 | 
			
		||||
    if matches:
 | 
			
		||||
        metadata["title"] = html.unescape(matches[0])
 | 
			
		||||
 | 
			
		||||
    tracks = []
 | 
			
		||||
    current_part = ""
 | 
			
		||||
    last_timestamp = 0
 | 
			
		||||
    last_name = ""
 | 
			
		||||
 | 
			
		||||
    for c in chapters:
 | 
			
		||||
        item_number = len(c.items)
 | 
			
		||||
        a = ""
 | 
			
		||||
        b = ""
 | 
			
		||||
        try:
 | 
			
		||||
            a = c.items[0].value.strip('"')
 | 
			
		||||
            b = c.items[1].value.strip('"')
 | 
			
		||||
        except IndexError:
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        if item_number == 1:
 | 
			
		||||
            last_name = a
 | 
			
		||||
        elif item_number == 2:
 | 
			
		||||
            current_part = a
 | 
			
		||||
            if milli(c.items[1].value.strip('"')) != last_timestamp:
 | 
			
		||||
                tracks.append(
 | 
			
		||||
                    {
 | 
			
		||||
                        "name": last_name,
 | 
			
		||||
                        "start": last_timestamp,
 | 
			
		||||
                        "end": milli(b),
 | 
			
		||||
                    }
 | 
			
		||||
                )
 | 
			
		||||
                last_name = a
 | 
			
		||||
                last_timestamp = milli(b)
 | 
			
		||||
        elif item_number == 3:
 | 
			
		||||
            if milli(b) != last_timestamp:
 | 
			
		||||
                tracks.append(
 | 
			
		||||
                    {
 | 
			
		||||
                        "name": last_name,
 | 
			
		||||
                        "start": last_timestamp,
 | 
			
		||||
                        "end": milli(b),
 | 
			
		||||
                    }
 | 
			
		||||
                )
 | 
			
		||||
            if current_part:
 | 
			
		||||
                last_name = f"{current_part}: " + a
 | 
			
		||||
            else:
 | 
			
		||||
                last_name = a
 | 
			
		||||
            last_timestamp = milli(b)
 | 
			
		||||
 | 
			
		||||
    tracks.append(
 | 
			
		||||
        {
 | 
			
		||||
            "name": last_name,
 | 
			
		||||
            "start": last_timestamp,
 | 
			
		||||
            "end": "",
 | 
			
		||||
        }
 | 
			
		||||
    )
 | 
			
		||||
    return tracks, metadata
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    tracks = []
 | 
			
		||||
    if not os.path.exists(args.audio_file):
 | 
			
		||||
        die("AUDIO_FILE: {} doesn't exist.".format(args.audio_file))
 | 
			
		||||
 | 
			
		||||
    if not args.output_folder:
 | 
			
		||||
        args.output_folder = os.getcwd()
 | 
			
		||||
 | 
			
		||||
    export_folder = os.path.abspath(args.output_folder)
 | 
			
		||||
    if os.path.exists(export_folder):
 | 
			
		||||
        pass
 | 
			
		||||
    elif os.path.exists(os.path.dirname(export_folder)):
 | 
			
		||||
        os.makedirs(export_folder)
 | 
			
		||||
    else:
 | 
			
		||||
        die(f'OUTPUT_FOLDER: "{export_folder}" does not exist.')
 | 
			
		||||
 | 
			
		||||
    if args.google_id:
 | 
			
		||||
        args.google_link = GOOGLE_LINK.format(id=args.google_id)
 | 
			
		||||
 | 
			
		||||
    if args.google_link:
 | 
			
		||||
        parsed = parse.urlparse(args.google_link)
 | 
			
		||||
        id = parse.parse_qs(parsed.query).get("id")
 | 
			
		||||
        if not id:
 | 
			
		||||
            die(f"This does not look right: {args.google_link}")
 | 
			
		||||
        id = id[0]
 | 
			
		||||
        destination = f"{export_folder}{os.path.sep}{id}.txt"
 | 
			
		||||
        if os.path.exists(destination) and not args.no_resume:
 | 
			
		||||
            print(f"Using local metadata: {destination}")
 | 
			
		||||
            tracks, metadata = load_metadata_from_file(destination)
 | 
			
		||||
        else:
 | 
			
		||||
            tracks, metadata = get_tracks_from_google(args.google_link)
 | 
			
		||||
            if metadata.get("title"):
 | 
			
		||||
                export_folder = (
 | 
			
		||||
                    os.getcwd()
 | 
			
		||||
                    + os.path.sep
 | 
			
		||||
                    + "".join(
 | 
			
		||||
                        c
 | 
			
		||||
                        for c in metadata.get("title")
 | 
			
		||||
                        if c not in args.banned_characters
 | 
			
		||||
                    )
 | 
			
		||||
                )
 | 
			
		||||
                destination = f"{export_folder}{os.path.sep}{id}.txt"
 | 
			
		||||
                if os.path.exists(export_folder):
 | 
			
		||||
                    pass
 | 
			
		||||
                elif os.path.exists(os.path.dirname(export_folder)):
 | 
			
		||||
                    os.makedirs(export_folder)
 | 
			
		||||
                else:
 | 
			
		||||
                    die(f'OUTPUT_FOLDER: "{export_folder}" does not exist.')
 | 
			
		||||
        save_metadata_to_file(
 | 
			
		||||
            tracks, destination, metadata=metadata, source=args.google_link
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    if args.metadata_file:
 | 
			
		||||
        tracks, metadata = load_metadata_from_file(args.metadata_file)
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        if not tracks:
 | 
			
		||||
            die(
 | 
			
		||||
                'No tracks found, check format: "Chapter name|(hh:mm:ss|mm:ss|s*)", the '
 | 
			
		||||
                "time represents the length of the chapter."
 | 
			
		||||
            )
 | 
			
		||||
    except NameError:
 | 
			
		||||
        die("Error in reading the TRACKFILE or parsing the google page.")
 | 
			
		||||
 | 
			
		||||
    print(f"{len(tracks)} tracks to export to {export_folder}.")
 | 
			
		||||
    if args.export_only > 0:
 | 
			
		||||
        print(f"We are only exporting {args.export_only}.")
 | 
			
		||||
    exported = 0
 | 
			
		||||
    skipped = 0
 | 
			
		||||
    try:
 | 
			
		||||
        iterator = 1
 | 
			
		||||
        for track in tracks:
 | 
			
		||||
            chapter_name = (
 | 
			
		||||
                f"{str(iterator).zfill(len(str(len(tracks))))}. "
 | 
			
		||||
                + "".join(
 | 
			
		||||
                    char for char in track["name"] if char not in args.banned_characters
 | 
			
		||||
                ).strip()
 | 
			
		||||
            )
 | 
			
		||||
            meta_title = "".join(
 | 
			
		||||
                char for char in track["name"] if char not in args.banned_characters
 | 
			
		||||
            ).strip()
 | 
			
		||||
            destination = f"{export_folder}{os.path.sep}{chapter_name}.{args.format}"
 | 
			
		||||
            if (not args.dont_skip_existing) and os.path.exists(destination):
 | 
			
		||||
                print(f'Skipped "{chapter_name}" ({iterator}/{len(tracks)})')
 | 
			
		||||
                skipped += 1
 | 
			
		||||
                iterator += 1
 | 
			
		||||
                continue
 | 
			
		||||
            command = (
 | 
			
		||||
                [
 | 
			
		||||
                    "ffmpeg",
 | 
			
		||||
                    "-y" if args.override_output else "-n",
 | 
			
		||||
                    "-ss",
 | 
			
		||||
                    str(track["start"]),
 | 
			
		||||
                    "-i",
 | 
			
		||||
                    args.audio_file,
 | 
			
		||||
                ]
 | 
			
		||||
                + (["-to", str(track["end"]), "-copyts"] if track["end"] else [])
 | 
			
		||||
                + [
 | 
			
		||||
                    "-metadata",
 | 
			
		||||
                    f"title={meta_title}",
 | 
			
		||||
                    "-metadata",
 | 
			
		||||
                    f"track={iterator}",
 | 
			
		||||
                    f"{destination}",
 | 
			
		||||
                ]
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            if args.debug:
 | 
			
		||||
                print(" ".join(command))
 | 
			
		||||
            run_result = subprocess.run(
 | 
			
		||||
                command,
 | 
			
		||||
                capture_output=True,
 | 
			
		||||
            )
 | 
			
		||||
            if run_result.returncode != 0:
 | 
			
		||||
                print(f"command was: '{' '.join(command)}'")
 | 
			
		||||
                die(f"ffmpeg did not terminate normally: {str(run_result.stderr)}")
 | 
			
		||||
 | 
			
		||||
            print(f'Exported "{chapter_name}" ({iterator}/{len(tracks)})')
 | 
			
		||||
            exported += 1
 | 
			
		||||
            iterator += 1
 | 
			
		||||
            if args.export_only > 0 and args.export_only == exported:
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
    except KeyboardInterrupt:
 | 
			
		||||
        print("\nKeyboardInterrupt, aborted processing.\n")
 | 
			
		||||
    print(f"Exported {exported} tracks to {export_folder}.")
 | 
			
		||||
    if skipped:
 | 
			
		||||
        print(f"Skipped {skipped} files that already existed.")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    main()
 | 
			
		||||
		Reference in New Issue
	
	Block a user