Space Panda

musicrename.py

Source

#!/usr/bin/env python3
"""Rename audio files in bulk with a pattern according to their metadata
Simple example use: suppose you have some songs of Smetana's "Má Vlast" and the
files have the proper metadata (title, artist, tracknumber, etc.), but the
filenames are unreadable crap. You can run this script in the folder with the
files (-d will prevent any changes from being executed):
    musicrename.py -d *
    hokey.opus -> Bedřich_Smetana-Vyšehrad.opus
    foo.ogg -> Bedřich_Smetana-Vltava.ogg
    bar.mp3 -> Bedřich_Smetana-Šárka.mp3
Of course you know it's Smetana, but the track number is more interesting, so
you change the file pattern and, as an extra, remove the diacritics with -r:
    musicrename.py -r -p "{tracknumber} - {title}" *
    hokey.opus -> 1_-_Vysehrad.opus
    foo.ogg -> 2_-_Vltava.ogg
    bar.mp3 -> 3_-_Sarka.mp3
To also move the files into (new) folders is done via the -m parameter:
    musicrename.py -m "{artist}/{date}-{album}" *
    hokey.opus -> Bedřich_Smetana/1875-Má_vlast/1_-_Vysehrad.opus
    foo.ogg -> Bedřich_Smetana/1875-Má_vlast/2_-_Vltava.ogg
    bar.mp3 -> Bedřich_Smetana/1875-Má_vlast/3_-_Sarka.mp3
For moving the same patterns as for the -p parameter apply.
Check --help for more useful parameters.
On Patterns:
musicrename.py requires mutagen to read out the metadata from audio files. But
that also means that all tags form mutagen are available in the patterns:
- artist
- artistsort
- album
- albumsort
- title
- tracknumber
- date
If you use it to rename video files, these tags might be available:
- tvshow
- tvshowsort
- tvseason
- tvepisode
There are more, please consult https://mutagen.readthedocs.io/
"""
__version__ = '0.4.0'
import pathlib
import argparse
import sys
import os
import unicodedata
import string
import shutil
import mutagen
import mutagen.easymp4
transliterations = {
'ru': [('Б', 'B'), ('б', 'b'), ('В', 'V'), ('в', 'v'),
       ('Г', 'G'), ('г', 'g'), ('Д', 'D'), ('д', 'd'),
       ('Ж', 'Ž'), ('ж', 'ž'), ('З', 'Z'), ('з', 'z'),
       ('И', 'I'), ('и', 'i'), ('Й', 'J'), ('й', 'j'),
       ('К', 'K'), ('к', 'k'), ('Л', 'L'), ('л', 'l'),
       ('М', 'M'), ('м', 'm'), ('Н', 'N'), ('н', 'n'),
       ('П', 'P'), ('п', 'p'), ('Р', 'R'), ('р', 'r'),
       ('С', 'S'), ('с', 's'), ('Т', 'T'), ('т', 't'),
       ('У', 'U'), ('у', 'u'), ('Ф', 'F'), ('ф', 'f'),
       ('Х', 'X'), ('х', 'x'), ('Ц', 'C'), ('ц', 'c'),
       ('Ч', 'Č'), ('ч', 'č'), ('Ш', 'Š'), ('ш', 'š'),
       ('Щ', 'ŠČ'), ('щ', 'šč'), ('Ъ', '"'), ('ъ', '"'),
       ('Ы', 'Y'), ('ы', 'y'), ('Ь', "'"), ('ь', "'"),
       ('Э', 'È'), ('э', 'è'), ('Ю', 'JU'), ('ю', 'ju'),
       ('Я', 'JA'), ('я', 'ja'), ('Ѳ', 'F'), ('ѳ', 'f'),
       ('Р', 'R'), ('р', 'r'), ('С', 'C'), ('с', 'c'),
       ('А', 'A'), ('а', 'a'), ('Е', 'E'), ('е', 'e'),
       ('Ё', 'Ë'), ('ё', 'ë'), ('О', 'O'), ('о', 'o'),
       ('І', 'I'), ('і', 'i'), ('Ѵ', 'Ẏ'), ('ѵ', 'ẏ'),
       ('Ѣ', 'Ě'), ('ѣ', 'ě')],
'is': [('Þ', 'TH'), ('þ', 'th'), ('Æ', 'AE'), ('æ', 'ae'),
       ('Ð', 'D'), ('ð', 'd')],
'en': [('Þ', 'TH'), ('þ', 'th'), ('Æ', 'AE'), ('æ', 'ae')],
}
SAFE_CHARS = string.ascii_letters + string.digits + '_-'
filename_safety_table = dict([
('(', ''), (')', ''), ('[', ''), (']', ''), ('{', ''), ('}', ''),
])
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dummy', '-d',
                        action="store_true",
                        default=False,
                        help="Don't actually apply changes, just show what would be saved")
    parser.add_argument('--pattern', '-p',
                        type=str,
                        default='{artist}-{title}',
                        help='Pattern for the file name. Defaults to %(default)s.'
                             ' Call --pattern-help for details.')
    parser.add_argument('--remove-diacritics', '-r',
                        action="store_true",
                        default=False,
                        help="Try your best to remove diacritics")
    parser.add_argument('--lower-case', '-l',
                        action="store_true",
                        default=False,
                        help="Lower case all letters")
    parser.add_argument('--transliterate', '-t',
                        type=str,
                        default='en',
                        help="What transliteration table to use. "
                             "Options are: " + ', '.join(sorted(transliterations.keys())) + ". "
                             "Defaults to 'en'.")
    parser.add_argument('--allow-spaces', '-s',
                        action="store_true",
                        default=False,
                        help="Allow single ASCII spaces in filenames.")
    parser.add_argument('--move', '-m',
                        type=str,
                        default=None,
                        help="Pattern of the path you want to move the files into "
                             "instead of just renaming them. No file moving by default.")
    parser.add_argument('--verbose', '-v',
                        action="store_true",
                        default=False,
                        help="Also announce when renaming files.")
    parser.add_argument("files",
                        nargs="+",
                        help="Files to work with/on")
    return parser.parse_args()
def normalize(args, text):
    """Normalize the given text using the configuration from args
    E.g. lower-case, strip diacritics, etc.
    """
    if args.lower_case:
        text = text.lower()
    if args.transliterate in transliterations:
        converted = ''
        table = dict(transliterations[args.transliterate])
        for letter in text:
            unicodename = unicodedata.name(letter)
            if letter in table:
                letter = table[letter]
            elif unicodename.startswith('LATIN '):
                pass
            elif ' LETTER ' in unicodename:
                print(f"Cannot transliterate {letter}. Wrong transliteration table?")
            converted += letter
        text = converted
    if args.remove_diacritics:
        converted = ''
        for letter in text:
            unicodename = unicodedata.name(letter)
            if unicodename.startswith('LATIN ') and ' WITH ' in unicodename:
                letter = unicodedata.lookup(unicodename.split(' WITH ', 1)[0])
            converted += letter
        text = converted
    converted = ''
    for letter in text:
        if letter in filename_safety_table:
            letter = filename_safety_table[letter]
        elif letter.isspace() and letter not in SAFE_CHARS:
            letter = '_'
        elif not letter.isprintable():
            letter = ''
        elif letter.isascii() and letter not in SAFE_CHARS:
            letter = '_'
        elif 'LETTER' not in unicodedata.name(letter) and letter not in SAFE_CHARS:
            letter = '_'
        converted += letter
    # remove duplicate underscores
    while '__' in converted:
        converted = converted.replace('__', '_')
    # get rid of leading and trailing underscores
    converted = converted.strip('_')
    # if everything was removed, pretend '_' is the converted text
    if len(converted) == 0:
        converted = '_'
    return converted
def main():
    args = parse_args()
    renames = []
    global SAFE_CHARS
    if args.allow_spaces:
        SAFE_CHARS += ' '
    for filename in args.files:
        fullpath = pathlib.Path(filename).expanduser()
        if not fullpath.is_file():
            print(f"File not found (or not a file): {filename}", file=sys.stderr)
            continue
        meta = mutagen.File(str(fullpath), easy=True)
        if meta is None:
            print(f"No metadata found in {filename}", file=sys.stderr)
            continue
        tags = {k: v[0] for k, v in meta.items()}
        if 'tracknumber' in tags:
            nr_of_tracks = ''
            tracknumber = tags['tracknumber']
            if '/' in tags['tracknumber']:
                tracknumber, nr_of_tracks = tracknumber.split('/', 1)
            elif 'tracktotal' in tags:
                nr_of_tracks = tags['tracktotal']
            tags['tracknumber'] = f'{int(tracknumber):0>{len(nr_of_tracks)}}'
        try:
            new_name = args.pattern.format(**tags)
        except KeyError as exc:
            print(f"Can not rename {filename}, missing metadata field {exc}", file=sys.stderr)
            continue
        new_name = normalize(args, new_name)
        move_to = ''
        if args.move is not None:
            sane_tags = {k: v.replace('/', '_') for k, v in tags.items()}
            try:
                move_to = args.move.format(**sane_tags)
            except KeyError as exc:
                print(f"Can not move {filename}, missing metadata field {exc}", file=sys.stderr)
                move_to = ''
            # sanitize the parts of the new folder
            move_to = [normalize(args, part) for part in pathlib.Path(move_to).parts]
            move_to = pathlib.Path('/'.join(move_to))
        new_path = fullpath.parent / move_to / (new_name + fullpath.suffix)
        renames.append((fullpath, new_path))
    if args.dummy:
        print("Dummy mode! Not actually renaming anything", file=sys.stderr)
    for old_file, new_file in renames:
        if old_file == new_file:
            if args.dummy or args.verbose:
                print(f"{old_file} already has the correct filename, skipping", file=sys.stderr)
            continue
        action = "Renaming"
        if args.move:
            action = "Moving"
        if args.verbose or args.dummy:
            print(f"{action} {old_file} -> {new_file}")
        if args.dummy:
            continue
        if new_file.exists():
            print(f"Cannot rename {old_file} to {new_file}: {new_file.name} already exists!")
            continue
        if args.move:
            new_file.parent.mkdir(parents=True, exist_ok=True)
            shutil.move(old_file, new_file)
        else:
            os.rename(old_file, new_file)
mutagen.easymp4.EasyMP4Tags.RegisterTextKey("tvshow", "tvsh")
mutagen.easymp4.EasyMP4Tags.RegisterTextKey("tvshowsort", "sosn")
mutagen.easymp4.EasyMP4Tags.RegisterIntKey("tvseason", "tvsn")
mutagen.easymp4.EasyMP4Tags.RegisterIntKey("tvepisode", "tves")
if __name__ == '__main__':
    main()