-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnfd-checker.py
190 lines (146 loc) · 6.61 KB
/
nfd-checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# Relevant info about the file attributes can be found here:
# https://developers.google.com/drive/api/v2/reference/files#resource-representations
# The original PyDrive project is not maintained anymore.
# A maintenance fork is PyDrive2
# https://pypi.org/project/PyDrive2/
# TODO: during processing mark processed files and at the end of the run
# TODO: complie a list of files, which are not reachable from the drive root object
import warnings,logging,os,sys,resource,argparse,unicodedata
import oauth2client.client
from pydrive2.auth import GoogleAuth,RefreshError
from pydrive2.drive import GoogleDrive
fileStash = {} # id -> GoogleDriveFile; a Dict of all items received from Drive
childList = {} # id -> List of child node id's
nfcStatus = {} # id -> Boolean; True if name passes as NFC, False if not
nonNfcItems = {} # id -> path; List of file ids and corresponding full names
# with path, which are found to be not NFC compliant.
# These are the same elements which nfcStatus->False.
parser = argparse.ArgumentParser(
description = 'Checks your Google Drive for NFD encoded filenames '\
'and converts them to NFC format')
parser.add_argument('--print_tree', dest='print_tree', action='store_true',
help='print the Drive directory tree')
parser.add_argument('--q_rootfiles', type=str,
help='The query string used to get the items to be worked on, default '\
'is the root of the drive (q = "%(default)s"). As a precaution, '\
'you should try it with a smaller scope, like '\
'"title contains \'---accent_test---\' and trashed = false"')
parser.add_argument('--no-dry_run', dest='dry_run', action='store_false',
help='If set, only then will the NFD->NFC conversion take place')
parser.add_argument('--report_memory', dest='report_memory', action='store_true',
help='Enable reporting of used memory')
parser.add_argument('--debug', dest='debug', action='store_true',
help='Enable debugging')
parser.set_defaults(print_tree=False)
parser.set_defaults(q_rootfiles="'root' in parents and trashed = false")
parser.set_defaults(dry_run=True)
parser.set_defaults(report_memory=False)
parser.set_defaults(debug=False)
args = parser.parse_args()
#TODO: instead of messing w/ the root logger, a named self logger should be used
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.ERROR)
if args.debug:
logging.getLogger().setLevel(level=logging.DEBUG)
# https://github.com/googleapis/google-api-python-client/issues/299
logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)
logging.debug('Command line arguments: {}'.format(args))
def report_memory(desc):
log_msg = 'Using {} Mb - {}'.format(
resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000,
desc)
logging.info(log_msg)
if args.report_memory:
print('\n{}\n'.format(log_msg))
#'mimeType': 'application/vnd.google-apps.folder'
def is_gfolder(gdfv2):
return (
'mimeType' in gdfv2
and gdfv2['mimeType']=='application/vnd.google-apps.folder')
def print_gfile(ident_str, gdfv2):
if is_gfolder(gdfv2):
no_of_children = len(childList[gdfv2['id']]) if gdfv2['id'] in childList else 'N/A'
isdir_str_pre = '<DIR> '
isdir_str_post = ' [{} items]'.format(no_of_children)
else:
isdir_str_pre = ''
isdir_str_post = ''
isnonnfc_str = '' if nfcStatus[gdfv2['id']] else '***'
print('{}{}{:<45}'.format(
isnonnfc_str,
ident_str,
isdir_str_pre + gdfv2['title'] + isdir_str_post ))
def proc_item(level, ppath, gdfv2):
if unicodedata.is_normalized('NFC',gdfv2['title']):
nfcStatus[gdfv2['id']] = True
else:
nfcStatus[gdfv2['id']] = False
nonNfcItems[gdfv2['id']] = ppath + gdfv2['title']
ident_str = '{1:>{0}} '.format(4*level, '')
if args.print_tree:
print_gfile(ident_str, gdfv2)
if (is_gfolder(gdfv2) and gdfv2['id'] in childList):
for child_id in childList[gdfv2['id']]:
proc_item(level+1, ppath+gdfv2['title']+"/",fileStash[child_id])
report_memory('startup')
gauth = GoogleAuth()
# If the credentials file not found, this throws a warning
with warnings.catch_warnings():
warnings.simplefilter("ignore")
gauth.LoadCredentials()
gauth.GetFlow()
if gauth.credentials is None:
logging.info('Credentials file not found, '\
'requesting authorization interactively')
print('\nCredential file not found. Please paste the follwing line to your '\
'browser, authorize the app and paste back the received code, '\
'then press enter.\n')
print(gauth.GetAuthUrl())
code = str(input())
logging.info('Credentials received')
gauth.Auth(code)
gauth.SaveCredentials()
drive = GoogleDrive(gauth)
report_memory('auth finished')
# View all folders and file in your Google Drive
# PyDrive uses V2 of the Google Drive API (2021.04.28)
try:
print("Requesting list of all files and folders in this drive ... ", end='', flush=True)
allFileList = drive.ListFile({'q': "trashed = false"}).GetList()
print("done, {0} items".format(len(allFileList)), flush=True)
report_memory('loaded all files')
print("Requesting elements to start with... ", end='', flush=True)
rootFileList = drive.ListFile({'q': args.q_rootfiles}).GetList()
print("done, {0} items\n".format(len(rootFileList)), flush=True)
report_memory('loaded starting file set')
except RefreshError:
logging.error("Failed to re-use the access token, probably it has expired or invalid")
if os.path.exists("credentials.json"):
logging.error("Deleting the credentials file (it's unusable)")
os.remove("credentials.json")
sys.exit(1)
for file in allFileList:
file__id = file['id']
fileStash[file__id] = file
if 'parents' in file:
for p in file['parents']:
parent_id = p['id']
if not (parent_id in childList):
childList[parent_id] = []
childList[parent_id].append(file__id)
report_memory('file stash and child list created')
del allFileList
report_memory('deleted original file list')
for file in rootFileList:
proc_item(0, '/', file)
report_memory('process of items complete\n')
print('{} item with non-NFC name\n'.format(len(nonNfcItems)))
for id,path in nonNfcItems.items():
gf=fileStash[id]
dir_str = ' <DIR>' if is_gfolder(gf) else ''
print('{}{} ... '.format(path,dir_str), end='', flush=True)
if not args.dry_run:
gf['title'] = unicodedata.normalize('NFC', gf['title'])
gf.Upload()
print('FIXED')
else:
print('NOT CHANGED (dry run)')