-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfilename_parser.py
89 lines (71 loc) · 3.39 KB
/
filename_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import re
from typing import Optional, Match
class FilenameParser:
def __init__(self, infile: str):
self.infile = infile
self.label = ''
self.artist = ''
self.title = ''
def match_3part_underscores(self) -> Optional[Match[str]]:
m = re.match(r'^(?:\d+_(?:-_)?)?(?P<label>[a-zA-Z0-9]{2,}(?:_+[a-zA-Z0-9()]+)*)_*-_*'
r'(?P<artist>[a-zA-Z0-9.,&-]+(?:_+[a-zA-Z0-9().,&-]+)*)_*-_*'
r'(?P<title>[a-zA-Z0-9.,&]+(?:_+[a-zA-Z0-9().,&!]+)*)\.',
self.infile)
if m:
self.label = self.normalise_underscore(m.group('label'))
self.artist = self.normalise_underscore(m.group('artist'))
self.title = self.normalise_underscore(m.group('title'))
return m
def match_3part_underscores_cat_no_variation(self) -> Optional[Match[str]]:
m = re.match(r'^(?:\d+_(?:-_)?)?(?P<label>[A-Z0-9]{2,})_'
r'(?P<artist>[a-zA-Z0-9.,&-]+(?:_+[a-zA-Z0-9().,&-]+)*)_*-_*'
r'(?P<title>[a-zA-Z0-9.,&]+(?:_+[a-zA-Z0-9().,&!]+)*)\.',
self.infile)
if m:
self.label = self.normalise_underscore(m.group('label'))
self.artist = self.normalise_underscore(m.group('artist'))
self.title = self.normalise_underscore(m.group('title'))
return m
def match_2part_underscores(self) -> Optional[Match[str]]:
m = re.match(r'^(?:\d+_(?:-_)?)?(?P<artist>[a-zA-Z0-9.,&-]+(?:_+[a-zA-Z0-9().,&-]+)*)_*-_*'
r'(?P<title>[a-zA-Z0-9.,&]+(?:_+[a-zA-Z0-9().,&!]+)*)\.',
self.infile)
if m:
self.artist = self.normalise_underscore(m.group('artist'))
self.title = self.normalise_underscore(m.group('title'))
return m
def match_3part_spaces(self) -> Optional[Match[str]]:
m = re.match(r'^(?:\d+\s(?:-\s)?)?(?P<label>[a-zA-Z0-9]{2,}(?:\s+[a-zA-Z0-9()]+)*)\s+-\s+'
r'(?P<artist>[a-zA-Z0-9.,&-]+(?:\s+[a-zA-Z0-9().,&-]+)*)\s*-\s*'
r'(?P<title>[a-zA-Z0-9.,&]+(?:\s+[a-zA-Z0-9().,&!]+)*)\.',
self.infile)
if m:
self.label = self.normalise(m.group('label'))
self.artist = self.normalise(m.group('artist'))
self.title = self.normalise(m.group('title'))
return m
def match_2part_spaces(self) -> Optional[Match[str]]:
m = re.match(r'^(?:\d+\s(?:-\s)?)?(?P<artist>[a-zA-Z0-9.,&-]+(?:\s+[a-zA-Z0-9().,&-]+)*)\s*-\s*'
r'(?P<title>[a-zA-Z0-9.,&]+(?:\s+[a-zA-Z0-9().,&!]+)*)\.',
self.infile)
if m:
self.artist = self.normalise(m.group('artist'))
self.title = self.normalise(m.group('title'))
return m
def normalise(self, part: str) -> str:
return part.replace(' ', ' & ').replace(' w ', ' w/').replace(' t ', "'t ")
def normalise_underscore(self, part: str) -> str:
return self.normalise(part.replace('_', ' '))
def analyse(self) -> bool:
m = self.match_3part_underscores()
if not m:
m = self.match_3part_underscores_cat_no_variation()
if not m:
m = self.match_2part_underscores()
if not m:
m = self.match_3part_spaces()
if not m:
m = self.match_2part_spaces()
if not m:
return False
return True