-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreginsert.py
38 lines (34 loc) · 1.06 KB
/
reginsert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import csv
# Import regression coefficients
infile = open('tvt_timeseries_all.csv','rU')
reader = csv.DictReader(infile,delimiter=',')
reg = dict()
for row in reader:
reg[str(row.get('trope_id'))] = float(row.get('reg_coef'))
infile.close()
# Import base training or test data (with binary 1/0 trope variables)
infile = open('master_test.csv','rU')
reader = csv.DictReader(infile,delimiter=',')
h = reader.fieldnames # Full header
t = h[19:len(h)] # Only the trope columns
nt = h[0:19] # All other columns that won't be changed
# Output to file
outfile = open('master_test_plus.csv','w')
writer = csv.DictWriter(outfile,delimiter=',',fieldnames=h)
writer.writeheader()
# rnum is optional to see progress in terminal
rnum = 1
for row in reader:
newrow = dict()
# Copy unaltered columns
for col in nt:
newrow[col] = row.get(col)
# Multiply corresponding binary variable by trope's regression coefficient
for trope in t:
newrow[trope] = float(row.get(trope)) * reg[trope]
writer.writerow(newrow)
# Optional
print 'Row %s Done' % rnum
rnum+=1
infile.close()
outfile.close()