-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathstruct_from_J1587.py
563 lines (455 loc) · 16.5 KB
/
struct_from_J1587.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
#!/usr/bin/python
# This file will be used for scraping the data from the 1587 spec
# into some nice structures to be used for message inspection
# TODO: #Maybe make these tickets/issues (but let's see where we are
# # once public)
# 1) Fix up weird sequences from the format sections
# The spec here is making me angry due to inconsistencies. Some places they just repeat the letter to show repetion. Some places an ellipsis. In other instances, the '*' character is used as a delimeter and shown as such in the "Sequence" field. In other spots, it has a letter in the sequence field.
# 2) Multi-line summaries seem to be handled, but need to work with the PIDs that get filled in from other appendices
# 3) Datatypes that span multiple lines (ex: PID 500) need to be handled
# 4) Need to handle multi-line keys for the pid_fields dict
# 5) Handle bytes/bits definitions using the sequence
# There is a lot of variation among the format definitions, for instance PID 206
# this makes it very difficult to find a general solution for using the parsed content
# 6) Take out locations where I used hard-coded line numbers due to time
# 7) PID 79 missing, may also want to check all PIDs and see what others
#
######################################################################
import re,os,pickle,itertools,json
import pprint as pp, logging as lg
import configparser
import platformdirs
orig_work_dir = os.getcwd()
script_dir = os.path.dirname(__file__)
os.chdir(script_dir)
config = configparser.ConfigParser()
config.read("config.cfg")
# This file should be the output of : pdftotext -layout J1587_201301.pdf <filename>
filepath_layout_1587 = config.get("Filepaths", "1587_filepath", raw=False)
# This file should be the output of : pdftotext -layout J1708_201609.pdf <filename>
filepath_layout_1708 = config.get("Filepaths", "1708_filepath", raw=False)
# Lines with layout
fhl = open(filepath_layout_1587,"r", encoding='utf-8')
linesl = fhl.readlines()
# 1708 lines with layout
fh1708l = open(filepath_layout_1708,"r", encoding='utf-8')
lines1708 = fh1708l.readlines()
os.chdir(orig_work_dir)
def get_mids(linesl):
""" Return dictionary of mid:meaning """
foundstartmids = False
mids = {}
for line in linesl:
# Before table
if not foundstartmids and "MID #" in line:
foundstartmids = True
# MID table found
elif foundstartmids:
if re.match(' *[0-9]+',line.strip()):
s = re.split(' {3,}',line)
# We only grab the Basic Heavy Duty Column
# otherwise s[3] for Mass Transit Specific, s[4] for Marine Specific
mids[s[1]] = s[2]
# After table
if "NOTE: Designers" in line:
break
return mids
def get_pids(linesl):
"""
Required layout lines.
Returns dictionary of pid:meaning
"""
pids = {}
foundstartpids = False
for line in linesl:
# After table
if "These PIDs are superseded by PIDs 194, 195, and 196." in line:
break
# Before table
if not foundstartpids and " TABLE 2 - PARAMETER IDENTIFICATION" in line:
foundstartpids = True
elif foundstartpids:
if re.match(' +[0-9]+[^\.]',line):
garbage,pid,meaning = re.split(" {9,}",line)
pids[pid.strip()] = meaning.strip()
return pids
def get_fmis(linesl):
""" Requres layout lines. Return dictionary of FMIs """
foundstartfmis = False
fmis = {}
for line in linesl[1247:1265]:
if "(that is" in line: continue
fmi,meaning= re.split(" {5}",line.strip())
fmis[fmi] = meaning
if len(fmis) != 16:
return -1
return fmis
def get_diagnostic_sids():
return {str(150+x):"System Diagnostic Code #"+str(x) for x in range(1,6)}
def get_common_sids(linesl):
""" Use the formatted lines. Return dictionary of common_sid:meaning """
foundstartcsids = False
common_sids = {}
for line in linesl:
if foundstartcsids and "Engine SIDs" in line:
break
if not foundstartcsids and " Common SIDs" in line:
foundstartcsids = True
elif foundstartcsids:
if re.match(' +[0-9]+[^\.]',line):
garbage,sid,meaning = re.split(" {9,}",line)
common_sids[sid.strip()] = meaning.strip()
return common_sids
def get_xdevice_sids(linesl):
""" Get the sids that are the same across devices """
d = get_diagnostic_sids()
d.update(get_common_sids(linesl))
return d
def get_sids_for_mids(linesl):
""" Get the sids that are related to specific mids as a dictionary """
sets = {}
regex = ".* SIDs *.*\( *MIDs? *="
for line in linesl:
if re.match(regex,line):
group = line.split("(")[0].strip()
sids = re.findall("[0-9]+",line)
sets[group] = [sids,{}]
for group in sets.keys():
if "Engine" in group:
sid_dict = parse_sids_for_mid_group(group,"511",linesl)
else:
sid_dict = parse_sids_for_mid_group(group,"150",linesl)
sets[group][1] = extend_dict(clean_ids(sid_dict))
return sets
def parse_sids_for_mid_group(group,groupenddemarc,linesl):
found = False
dic = {}
for line in linesl:
if not found and group in line:
found = True
elif found:
if re.match(' +[0-9]+[^\.]',line):
try:
sid,meaning = re.split(" {6,}",line.strip())
dic[sid] = meaning
except:
print("Error parsing: %s",line)
dbg(re.split(" {6,}",line.strip()))
# last line we need
if groupenddemarc in line:
break
return dic
def get_mids_from_1708(lines1708):
mids = {}
for line in lines1708[541:568]:
s = line.strip()
if re.match("[0-9]{1,3}",s):
try:
mid,meaning = re.split(" {6,}",s)
except:
mid = "125"
meaning = meaning125
else:
m = re.split("\(",line.strip())
if len(m) > 1:
meaning125 = m[1] + " J2497"
mids[mid] = meaning
if "88" in mid:
mids[mid] = mids[mid] + ". Suggested for dynamic allocation in J2497."
if "87" in mid:
mids[mid] = mids[mid] + ". Signals ABS system is actively controlling ABS event."
if "10" in mid and "110" not in mid:
mids[mid] = mids[mid] + ". Trailer ABS indicator ON/OFF."
return mids
def combine_mid_ranges(linesl,lines1708):
d = get_mids_from_1708(lines1708)
d = extend_dict(d)
c = get_mids(linesl)
c = extend_dict(c)
# Be sure we are doing this in the right order
c.update(d)
return c
def clean_sids_for_mids(sids_for_mids):
n = {}
for k,d in sids_for_mids.tems():
n[k] = clean_ids(d)
return n
def clean_ids(ids):
n = {}
for k,v in ids.items():
if "(" in k: n[k[:k.find(" (")]] = v
else: n[k] = v
return n
def range_from_hyphenated(key, meaning):
splitchar = ""
r = {}
if "-" in key:
splitchar = "-"
elif b"\xe2\x80\x94".decode('utf-8') in key:
splitchar = b"\xe2\x80\x94".decode('utf-8')
elif b"\xe2\x80\x93".decode('utf-8') in key:
splitchar = b"\xe2\x80\x93".decode('utf-8')
if splitchar:
rng = key.split(splitchar)
r = {str(n):meaning for n in range(int(rng[0]),int(rng[1])+1)}
else:
r[key] = meaning
return r
def extend_dict(d):
# Take in dictionary of [msp]id:meanings and return expanded version
r = {}
for k,d in d.items():
res = range_from_hyphenated(k,d)
r.update(res)
return r
def get_next(it):
""" Ignore the pdf junk that delineates pages, and blank lines """
line = next(it)
while True:
if b"\x0c".decode('utf-8') in line.strip(): line = next(it)
elif "Downloaded from SAE International by" in line.strip(): line = next(it)
elif " "*34+"J1587" in line.strip(): line = next(it)
elif "_"*5 in line.strip(): line = next(it)
elif not line.strip(): line = next(it)
else: break
return line
def combine_custom_database(filepath,doc):
"""
Add or overwrite structs from custom database into our
main object.
"""
try:
fd = open(filepath,"rb")
# Made the decision to keep processing with original struct
# but print error
except IOError as e:
lg.error("IOError: %s" % e)
return doc
override_dict = json.load(fd)
fd.close()
# doc.update(override_dict) is clobbering for some reason
# so do it more manually for now
for k,v in override_dict.items():
doc[k].update(v)
return doc
############################################################
# UTILS #
# These are functions we may want to call from elsewhere #
############################################################
def dbg(s):
pp.pprint(s)
def get_sid_mids():
""" Return list of mids that have sid associations.
Handy for quick check to see if next byte is an sid or pid,
if I have interpreted the spec correctly.
"""
l = [128, 175, 183, 184, 185,
186, 130, 176, 223, 136,
137, 138, 139, 246, 247,
140, 234, 142, 187, 188,
143, 146, 200, 150, 151,
157, 162, 191, 163, 166,
167, 168, 169, 186, 178,
190, 217, 218, 219, 222,
232, 254, 248, 253, 177]
return l
def get_bytecount_from_pid(pid):
"""
The pid is the calculated value, which could be composed of several
bytes when using page extensions.
Return the number of bytes the pid utilizes. 3 for n. -1 for unknown
"""
bytes1 = [(0,127),(256,383),(512,639),(768,895)]
bytes2 = [(128,191),(384,447),(640,703),(896,959)]
bytesn = [(192,253),(448,509),(704,765),(960,1021)]
# 254 is proprietary data. I will put it under "variable" len, as it is not
# addressed in the spec, but we had it in our captures
bytesn.append((254,254))
for rng in bytes1:
if pid in range(rng[0],rng[1]+1): return 1
for rng in bytes2:
if pid in range(rng[0],rng[1]+1): return 2
for rng in bytesn:
if pid in range(rng[0],rng[1]+1): return 3
return -1
def get_document_object(customdb="",nocache=False):
global linesl,lines1708
cache_dir = platformdirs.user_cache_dir('pretty_j1587', 'ais')
pickled_cache_file = os.path.join(cache_dir, "J1587_1708_2497_doc_obj")
doc = {}
# Use a cache in cache dir
if not os.path.exists(pickled_cache_file) or nocache:
doc["mids"] = combine_mid_ranges(linesl,lines1708)
doc["fmis"] = get_fmis(linesl)
doc["pids"] = extend_dict(clean_ids(get_pids(linesl)))
doc["xdev_sids"] = extend_dict(get_xdevice_sids(linesl))
doc["sids_for_mids"] = get_sids_for_mids(linesl)
doc["pid_fields"] = get_pid_fields(linesl)
# Write the file
os.makedirs(cache_dir, exist_ok=True)
fh = open(pickled_cache_file,"wb")
pickle.dump(doc,fh,pickle.HIGHEST_PROTOCOL)
fh.close()
else:
# Load the cached file
# Reboot or cache file deletion will require
# reparsing the spec documents
fh = open(pickled_cache_file,"rb")
doc = pickle.load(fh)
fh.close()
if customdb: doc = combine_custom_database(customdb,doc)
return doc
def get_pid_fields(linesl):
""" Parse the data from the appendix to be used for better PID detail
This one needs a bit more work.
Appendix F and sequence work
"""
pid_fields = {}
line_iter = iter(linesl)
# Need to come up with a good way to get this
summary = ""
for line in line_iter:
try:
if re.match('^A\.[0-9]+',line):
line = ""
while not re.match('^A\.[0-9]+',line.strip()):
cont = True
line = get_next(line_iter)
if "Parameter Data Length:" in line:
cont = False
pdl = line.split(":")[1].strip()
elif "Data Type:" in line:
cont = False
dt = line.split(":")[1].strip()
elif "Resolution:" in line:
cont = False
res = line.split(":")[1].strip()
elif "Maximum Range:" in line:
cont = False
mr = line.split(":")[1].strip()
elif "Transmission Update Period:" in line:
cont = False
tup = line.split(":")[1].strip()
elif "Message Priority:" in line:
cont = False
mp = line.split(":")[1].strip()
elif "Format:" in line:
cont = False
get_next(line_iter) # PID Data, which we discard
line = get_next(line_iter) # [0-9]{1,3} [a-z]+
line_l = [x for x in line.strip().split(" ") if x]
pid = line_l[0]
seq = "".join(line_l[1:])
# These are probably representative of optional params
# the issue is representing continuing sequences, which
# seems inconsistant
#if '[' in seq: print(pid,seq)
# Handle bytes/bits here
bytedef = {}
line = get_next(line_iter)
while not re.match("^[A-F]\.[0-9]+",line.strip()):
#while line.strip():
# Found one case where the "\xe2\x80\x94" character was set as "-"
# in case this comes up somewhere else, I'll just do generic code
if re.match(" *[a-z]-",line):
line = line.replace("-", b"\xe2\x80\x94".decode('utf-8'))
if b"\xe2\x80\x94 ".decode('utf-8') in line.strip():
a,b = line.strip().split(b"\xe2\x80\x94 ".decode('utf-8'))
# Sometimes we get "a a", for 2 byte pids where both bytes
# represent the same value, which would be handled by sequence
a = a.strip()[0]
b = b.strip()
bytedef[a] = b
line = get_next(line_iter)
# END inner WHILE
# Put together dict for this pid
pid_fields[pid] = {}
pid_fields[pid]["ParameterDataLength"] = pdl
pid_fields[pid]["DataType"] = dt
pid_fields[pid]["Resolution"] = res
pid_fields[pid]["MaximumRange"] = mr
pid_fields[pid]["TransmissionUpdatePeriod"] = tup
pid_fields[pid]["MessagePriority"] = mp
pid_fields[pid]["Sequence"] = seq.replace(b"\xe2\x80\xa6".decode('utf-8'),"...")
#if pid == "254": # I need to treat this one as variable length
# pid_fields[pid]["Sequence"] += "..."
pid_fields[pid]["ByteDef"] = bytedef # This should be nested dict
if summary:
pid_fields[pid]["Summary"] = summary
summary = ""
# ENDIF Format
else:
ls = line.strip()
# Need to handle multi-lines better here
if re.match("[a-zA-Z]+ ",ls):
if not cont:
summary = ls
cont = True
else:
summary += ls
# Reset summary if we are on a line in the appendix
# denoting a new PID
if "A." in line:
summary = ""
line = ""
# END WHILE
# PS, figuring this out was a pain, and it looks like
# I no longer need it anyway
#line_iter = itertools.chain([line],line_iter)
except StopIteration:
return pid_fields
if __name__ == "__main__":
## THIS SECTION IS ONLY FOR TESTING ##
def check2(seq):
""" Used here just for my own sanity checking """
seq = seq.strip(".")
old = ""
status = False
count = 0
for x in seq:
if x == old:
count += 1
elif x != old:
count = 1
if count > 2: return False
elif count == 2: status = True
old = x
return status
handled_special = [194,254]
handled_special.extend([196,198,199,237,233,240,498,506,212,210,211,226])
#doc = get_document_object("samplejson.def")
doc = get_document_object()
# Things to test within doc structure
#['xdev_sids', 'fmis', 'mids', 'sids_for_mids', 'pid_fields', 'pids']
pid_fields = doc["pid_fields"]
# Print which multi-byte PID sequences still need addressing
for x in pid_fields:
bseq = pid_fields[x]["Sequence"]
bseq = bseq.replace("(","").replace(")","")
if not re.match("^[a-z]+$",bseq):
if int(x) in handled_special: continue
elif re.match("[a-z]+[a-z]1[a-z]2[a-z]3[a-z]4",bseq):
continue
elif "NodataassociatedwithPID" in bseq:
continue
elif re.match("([a-z],)+[a-z]/[a-z],",bseq):
continue
# Grabs alternates
# Seems to be too far reaching
# nababab...
elif re.match(".*(?P<n>([a-z])[^\1])(?P=n)",bseq) and "..." in bseq:
continue
# nabccdd...
elif re.match(".*((?P<n>[a-z])(?P=n){1})+?",bseq) and check2(bseq) and "..." in bseq:
continue
# Print the special cases to be handled for multi-byte PIDs
print(x,pid_fields[x]["Sequence"])
# Interesting ones to look at
#dbg(pid_fields["204"])
#dbg(pid_fields["219"])
#dbg(pid_fields["226"])
#dbg(pid_fields["192"])
#dbg(pid_fields["500"])
#dbg(pid_fields["450"])
#dbg(pid_fields["223"])
dbg(doc["sids_for_mids"])