-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathxsfer_gd_to_tera.py
261 lines (219 loc) · 9.54 KB
/
xsfer_gd_to_tera.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# -*- coding: utf-8 -*-
"""Xsfer gd to tera.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/118htDK6HaGQC6fMl3XR8JAP9CdtMPuM3
<h3>Execute every node in sequence</h3>
*recomended <br/>
or u will miss some important edits
"""
# required modules installation
!pip install internetarchive
# downloading anytype of file from server
import urllib.request
# following variable keep target file location with file extension
# It is google photos export download link
tar_loc="10.zip"
urllib.request.urlretrieve('https://00f74ba44b06ad2b25331f2222d0a6d55fd7beefcb-apidata.googleusercontent.com/download/storage/v1/b/dataliberation/o/20210531T101632Z%2F7404861098411033338%2F121bdc54-40a4-427c-b8ca-882a6e8b3d56%2F1%2F38af1e43-9dbf-42c4-9bf2-2f4f7e48e461?jk=AFshE3WvL6UYYr--0UO14GbtXUT2XMMJi0oQHYI7UPZ6JTrIjCpqaS6pG7cDfgB4gmIcIv0E4H70nveONj85dX-JAYVlOLvchlHjELfzrDfb6-ilfm3dt7v9QWJs0N0Wj32h7JFRGccO3ff1-F3OtB2UYk5TKG8DK5S3NEfTSShFa-cYdC8AhE7tn8M1w8nouvBh6-UtxFgSYV919G-23DxC9BheLWhxeg_xT6wkEWXqmZu7GWWr39kl5SCAqhz4ONhjEkhRISH7KlomtJhIF52Us3P8smV1SzlQLsap5BIcZftwvbV4Y3mW3MUSRZ_Nk39Y42iO2J3-3oxzwejCW6oop7LKjIKjg-5wuygPw6butE8uoMC09wntfGEtqh_4ZyVtqMN1bKQgvYv7e0b9GPP5ZowWY12V3jFyJ9Zit3ZWqcGQZS_e7b42vsCb62Le1MAYbTUFje45VenOOilw9IjE6gZWdOtK9RAQ9CNCZaMPDHpVK7C7M9RgxuOCNqoAbinTZ5g4XHU6mufLnwMKu092HrfhalrlQMEylRv99wV50J-6p6e_MgSWJ7S0TAL9miFw_sgymXmD9Ry84zPBixIjz3w3eN79NQtmzyJbsV6kR2HwgEYonQAoaUfaLGggKi3CehOCl5VaTFCdeXboXxTweKBzq1hwMUVz6Ss2UKSl42vItFSh15uWvZ5pEocXqRwIs-_f5o8lm_7n7uXlyRFdDY1gS6ISDkKWD2vsftdTM94cLL5MAhhdF68Mn6pQ_kDNYiBUF2hohqib1sFij_4ULikWngT6ctZzQ5SSY7EnZJiWlAzeYZeJa0mbyCa1gsbOUX60K8OjIxjJpJP8u1NLeX9ALYj84kiCQljxTBaPNhKt28_0oE-5PcfHEP5JJzizS1SE3Sjh8131Wm6gg2MJSsSwvNOZDQ0awJN9XmeFaD2Iz0xA5aMzNarrbFCT0UzagTIL6nDDBE9StDAuD1rXAgVhXt6IpbvktnIBzQfFCX5YPCWqd6x-JaXbWR4IVnn55z9O_WarZwavAeAgvMVznje98SeWIj4FkvUrrz3Z7yfZR_dLF-HCz2dz1ORoFTWa3TxDbEJXT48WkguvwqU&isca=1',tar_loc)
#making new directory for opr.
os.mkdir("/content/KOTL/")
#moving to work folder to execute unzip process
work_fol="/content/KOTL"
os.chdir(work_fol)
if os.getcwd() == work_fol:
print(f'directory changed to {work_fol}')
# copying zip file to work folder
# file_path="/content/drive/MyDrive/gp_310521/10.zip"
# target_loc="/content/KOTL/"
# z="!cp "+file_path+" "+target_loc
# os.popen(z)
# if os module don't work or coping undecided size file, then using unix command
# !cp "/content/drive/MyDrive/gp_310521/10.zip" "/content/KOTL/"
# or this for progress of copying file
!rsync -ah --progress "/content/drive/MyDrive/gp_310521/10.zip" "/content/KOTL/"
#1st parameter is source and 2nd is destination
# size suggestions in multiple of bytes / checking file size for confirmation.
# import os
file_loc="/content/KOTL/zipping/python.zip"
st=os.stat(file_loc).st_size
size="bytes"
gb=1
if st > gb*1024:
size="kilobytes"
gb=1024*gb
if st > gb*1024:
size="megabytes"
gb=1024*gb
if st > gb*1024:
size="gigabytes"
gb=1024*gb
if st > gb*1024:
size="terabytes"
gb=1024*gb
if str(st / gb).find('.') != 0:
print(f'File Size in {size} is {str(st / gb)[0:(str(st / gb).find("."))+3]}')
else:
print(f'File Size in {size} is {st / gb}')
#removing unwanted folder in google colab perticularly for tree structure deletion
import shutil
try:
shutil.rmtree('/content/KOTL/Takeout')
except FileNotFoundError as er:
print(er)
# unzipping with build-in module ZipFile
# directory or files names loop will truncate at 5000 lines [pre-decided]
import time
from zipfile import ZipFile
begin = time.time()
zip_loc="10.zip"
with ZipFile(zip_loc, 'r') as zip:
print("available files in zip :\n")
zip.printdir()
print('\nExtracting all the files now...')
zip.extractall()
print('Unzipping Done!')
end = time.time()
print(f"\nTotal runtime of this code block is {end - begin}")
#making new directory for only .jpg type of files
#make sure KOTL folder already generated***
import os
try:
os.mkdir("/content/KOTL/out")
except FileExistsError:
print("folder already exists")
# now moving to that folder
work_fol="/content/KOTL/out"
os.chdir(work_fol)
if os.getcwd() == work_fol:
print(f'directory changed to {work_fol}')
#taking out all .jpg .bmp .tif .tiff .jpeg .gif .png .eps images from this folder and all sub-folders
import glob
import os
i=0
#this function will remove <space>,),(,` from folder or file name
def changer(x):
tmp=x
if x.find(' ')>0:
x=x.replace(' ','')
if x.find('(')>0:
x=x.replace('(','')
if x.find('`')>0:
x=x.replace('`','')
if x.find(')')>0:
x=x.replace(')','')
print('changing name to ',x)
os.rename(tmp,x)
return x
def xsfer(cpf,cpt):
types = ['*.bmp','*.tif','*.tiff','*.jpg', '*.jpeg','*.gif','*.png','*.eps']
types1 = ['*.mp4']
images = []
for files in types1:
images.extend(glob.glob(cpf + '/' +files))
for x in images:
if x.find(')') or x.find('(') or x.find(' ') or x.find('`'):
x = changer(x)
x='cp '+x+' '+ cpt
os.popen(x)
global i
i=i+1
pass
def caller(cpf,cpt):
if cpf.find(')') or cpf.find('(') or cpf.find(' ') or cpf.find('`'):
cpf = changer(cpf)
print("changing to "+cpf+" .....")
os.chdir(cpf)
#these are the types of file we want to extract
types = ['*.bmp','*.tif','*.tiff','*.jpg', '*.jpeg','*.gif','*.png','*.eps']
types1 = ['*.mp4']
files_grabbed = []
for files in types1:
files_grabbed.extend(glob.glob(cpf + '/' +files))
if len(files_grabbed)>0:
xsfer(cpf,cpt)
dir=os.listdir('.')
dir=list(filter(lambda x:x.find('.')<0,dir))
print('available folders in ',dir)
if len(dir)>0:
for dir_loc in dir:
caller(cpf+"/"+dir_loc,cpt)
pass
#this function will do this process of copying data to needed folder
caller('/content/KOTL/Takeout','/content/KOTL/out2')
print("total number of images transfered ",i)
#check size of folder using subprocess
import subprocess
path = '/content/KOTL/out1'
size = subprocess.check_output(['du','-sh', path]).split()[0].decode('utf-8')
print("Directory size: " + size)
#numbers of files available in this perticular folder
print("number of files available :",len([name for name in os.listdir(path) if os.path.isfile(name)]))
#creating a new directory for zipp files
import os
try:
os.mkdir("/content/KOTL/zipping")
except FileExistsError:
print("folder already exists")
# now moving to that folder
work_fol="/content/KOTL/zipping"
os.chdir(work_fol)
if os.getcwd() == work_fol:
print(f'directory changed to {work_fol}')
#now compressing files available in '/content/KOTL/out2' or sub-folders to into gpcontent.zip
import os
import zipfile
def zipdir(path, ziph):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
ziph.write(os.path.join(root, file),
os.path.relpath(os.path.join(root, file),
os.path.join(path, '..')))
zipf = zipfile.ZipFile('/content/KOTL/zipping/gpcontent.zip', 'w', zipfile.ZIP_DEFLATED)
zipdir('/content/KOTL/out2', zipf)
zipf.close()
#uploadiong to internetarchive.org using ia module
#configuring user details for login into account
!ia configure
# or
# from internetarchive import configure
# configure('[email protected]', 'password')
#checking secret content of created file
import os
fd = os.open('/root/.config/ia.ini',os.O_RDONLY)
os.lseek(fd, 0, 0)
str = os.read(fd, os.path.getsize(fd))
print(str.decode())
#creating session for upload
from internetarchive import get_session
#------ change these parameters as shown in above out field or got this url https://archive.org/account/s3.php after login
access = 'xxxxxxxxxxxxxxxx'
secret = 'xxxxxxxxxxxxxxxx'
#----------------------------
c = {'s3': {'access': access, 'secret': secret}}
session = get_session(config=c)
print(session)
# getting identifier for upload on ia
from internetarchive import get_item
cool_podcast = get_item('my_007') #this identifier is important, you need to init identifier before upload
print(cool_podcast.metadata)
upload_data = ['/content/KOTL/out'] # its an list of files or folders
# like the example given below, it will gonna upload every file available in that folder
# metadata of files we want to upload
md = {'title': 'Intellectual Property, and Other Legal Concerns" by Me (2016) - my_007',
'mediatype': 'data',
'collection': 'opensource_media',
'date': '2021-06-01',
'description': '<div><i>Pokémon GO</i> was an immediate sensation when Niantic released it in 2016, and it continues to be one of the highest-grossing apps on mobile devices. While the hype was still high, Tiffany C. Li wrote about potential legal rankles Niantic might face on the road to becoming a Poké Fan Master.<br /></div><div><br /></div><div><a href="https://osf.io/preprints/lawarxiv/gexpm/" rel="nofollow">The Paper.</a></div><div><br /></div><div>Mike Overby (<a href="https://twitter.com/lethargilistic" rel="nofollow">@lethargilistic</a>) reads <em>Amicus Lectio</em> (<a href="https://twitter.com/amicuslectio" rel="nofollow">@AmicusLectio</a></div>).',
'subject': ['law', 'pokemon', 'pokemon go', 'amicus lectio',
'privacy', 'trespass', 'augmented reality', 'copyright',
'trademark', 'intellectual property'],
'creator': 'HCS All Home',
'language': 'English',
'licenseurl': 'http://creativecommons.org/publicdomain/zero/1.0/'}
cool_podcast.upload(upload_data, metadata=md, verbose=True)
#this function return a status code like '200' which means everything is ok
#checking files uploaded on ia
from internetarchive import search_items
for item in search_items('identifier:hcs_007').iter_as_items():
print(item.metadata['title'])