Adjust JobID-related column names

NordicHPC · Aug 9, 2024 · cb9f4e8 · cb9f4e8
1 parent dc74c12
commit cb9f4e8
Show file tree

Hide file tree

Showing 4 changed files with 42 additions and 35 deletions.
diff --git a/README.rst b/README.rst
@@ -243,6 +243,12 @@ accounting database:
 Changelog
 ---------
 
+Next
+
+* JobID columns adjusted: ``JobID`` is the raw thing that slurm gives,
+  ``*only`` integer IDs without any trailing things,
+  ``JobIDrawonly`` is the RawJobID without any trailing things.
+
 0.9.1
 
 * Slurm >= 20.11 deprecates the ``AllocGRES`` and ``ReqGRES`` columns

diff --git a/slurm2sql.py b/slurm2sql.py
@@ -387,39 +387,40 @@ def calc(row):
 
 
 # Job ID related stuff
-jobidplain_re = re.compile(r'[0-9]+')
+jobidonly_re = re.compile(r'[0-9]+')
 jobidnostep_re = re.compile(r'[0-9]+(_[0-9]+)?')
-class slurmJobIDplain(linefunc):
+class slurmJobIDslurm(linefunc):
+    """The JobID field as slurm gives it, including _ and ."""
+    type = 'text'
+    @staticmethod
+    def calc(row):
+        if 'JobID' not in row: return
+        return row['JobID']
+
+class slurmJobIDonly(linefunc):
     """The JobID without any . or _.   This is the same for all array tasks/het offsets"""
     type = 'int'
     @staticmethod
     def calc(row):
         if 'JobID' not in row: return
-        return int(jobidplain_re.match(row['JobID']).group(0))
+        return int(jobidonly_re.match(row['JobID']).group(0))
 
 class slurmJobIDnostep(linefunc):
     """The JobID without any `.` suffixes.   This is the same for all het offsets"""
-    type = 'str'
+    type = 'text'
     @staticmethod
     def calc(row):
         if 'JobID' not in row: return
         return jobidnostep_re.match(row['JobID']).group(0)
 
-class slurmJobIDrawplain(linefunc):
-    """The (raw) JobID without any . or _.  This is different for every job."""
+class slurmJobIDrawonly(linefunc):
+    """The (raw) JobID without any . or _.  This is different for every job in an array."""
     type = 'int'
     @staticmethod
     def calc(row):
         if 'JobIDRaw' not in row: return
-        return int(jobidplain_re.match(row['JobIDRaw']).group(0))
+        return int(jobidonly_re.match(row['JobIDRaw']).group(0))
 
-class slurmJobIDRawnostep(linefunc):
-    """Same as jobIDrawplain.  Purpose should be sorted out or should be removed someday"""
-    type = 'int'
-    @staticmethod
-    def calc(row):
-        if 'JobIDRaw' not in row: return
-        return int(jobidplain_re.match(row['JobIDRaw']).group(0))
 
 arraytaskid_re = re.compile(r'_([0-9]+)')
 class slurmArrayTaskID(linefunc):
@@ -522,15 +523,16 @@ def calc(row):
     #   - JobID.JobStep
     #   - ArrayJobID_ArrayTaskID.JobStep
     # And the below is consistent with this.
-    'JobID': slurmJobIDrawplain,        # Integer JobID (for arrays JobIDRaw),
-                                        # without array/step suffixes.
+    '_JobID': slurmJobIDslurm,          # JobID directly as Slurm presents it
+                                        # (with '_' and '.')
     '_JobIDnostep': slurmJobIDnostep,   # Integer JobID without '.' suffixes
-    '_ArrayJobID': slurmJobIDplain,     # Same job id for all jobs in an array.
-                                        # If not array, same as JobID
-    '_ArrayTaskID': slurmArrayTaskID,   # Part between '_' and '.'
+    '_JobIDonly': slurmJobIDonly,       # Integer JobID without '_' or '.' suffixes
     '_JobStep': slurmJobStep,           # Part after '.'
-    '_JobIDSlurm': slurmJobIDslurm,     # JobID directly as Slurm presents it
-                                        # (with '_' and '.')
+    '_ArrayTaskID': slurmArrayTaskID,   # Part between '_' and '.'
+    '_JobIDRawonly': slurmJobIDrawonly,
+                                        # if array jobs, unique ID for each array task,
+                                        # otherwise JobID
+
     #'JobIDRawSlurm': str,              #
     'JobName': nullstr,                 # Free-form text name of the job
     'User': nullstr,                    # Username

diff --git a/test.py b/test.py
@@ -81,7 +81,7 @@ def csvdata(data):
     return reader
 
 def fetch(db, jobid, field, table='slurm'):
-    selector = 'JobIDSlurm'
+    selector = 'JobID'
     if table == 'eff':
         selector = 'JobID'
     r = db.execute(f"SELECT {field} FROM {table} WHERE {selector}=?", (jobid,))
@@ -140,7 +140,7 @@ def test_time(db, data1):
     r = db.execute("SELECT Time FROM slurm WHERE JobID=43977780;").fetchone()[0]
     assert r >= time.time() - 5
     # Job step: Submit defined, Start='Unknown', End='Unknown' --> Time should equal Submit
-    r = db.execute("SELECT Time FROM slurm WHERE JobIDSlurm='43977780.batch';").fetchone()[0]
+    r = db.execute("SELECT Time FROM slurm WHERE JobID='43977780.batch';").fetchone()[0]
     assert r == unixtime('2019-08-01T00:35:27')
 
 def test_queuetime(db, data1):
@@ -296,7 +296,7 @@ def test_slurm2011_gres(db, data2):
 # JobIDs
 #
 jobid_test_data = [
-    # raw text        JobIDplain    ArrayTaskID     JobStep         jobIDslurm
+    # raw text         JobIDonly   ArrayTaskID      JobStep              JobID
     ['7099567_5035',     7099567,         5035,        None,     '7099567_5035',  ],
     ['7102250',          7102250,         None,        None,          '7102250',  ],
     ['1000.2',              1000,         None,         '2',           '1000.2',  ],
@@ -309,21 +309,20 @@ def test_slurm2011_gres(db, data2):
 #    [, , , , ]
     ]
 jobidraw_test_data = [
-    # raw text        jobIDrawplain      jobIDrawnostep
-    ['7099567',             7099567,           7099567,   ],
-    ['7102250.1',           7102250,           7102250,   ],
+    # raw text        jobIDrawplain
+    ['7099567',             7099567, ],
+    ['7102250.1',           7102250, ],
     ]
-@pytest.mark.parametrize("text, jobidplain, arraytaskid, jobstep, jobidslurm", jobid_test_data)
-def test_jobids(text, jobidplain, arraytaskid, jobstep, jobidslurm):
-    assert slurm2sql.slurmJobIDplain.calc({'JobID': text}) == jobidplain
+@pytest.mark.parametrize("text, jobidonly, arraytaskid, jobstep, jobidslurm", jobid_test_data)
+def test_jobids(text, jobidonly, arraytaskid, jobstep, jobidslurm):
+    assert slurm2sql.slurmJobIDonly.calc({'JobID': text}) == jobidonly
     assert slurm2sql.slurmArrayTaskID.calc({'JobID': text}) == arraytaskid
     assert slurm2sql.slurmJobStep.calc({'JobID': text}) == jobstep
     assert slurm2sql.slurmJobIDslurm.calc({'JobID': text}) == jobidslurm
 
-@pytest.mark.parametrize("text, jobidrawplain, jobidrawnostep", jobidraw_test_data)
-def test_jobidraws(text, jobidrawplain, jobidrawnostep):
-    assert slurm2sql.slurmJobIDrawplain.calc({'JobIDRaw': text}) == jobidrawplain
-    assert slurm2sql.slurmJobIDRawnostep.calc({'JobIDRaw': text}) == jobidrawnostep
+@pytest.mark.parametrize("text, jobidrawonly", jobidraw_test_data)
+def test_jobidraws(text, jobidrawonly):
+    assert slurm2sql.slurmJobIDrawonly.calc({'JobIDRaw': text}) == jobidrawonly
 
 
 

diff --git a/tests/test-data3.csv b/tests/test-data3.csv
@@ -1,2 +1,2 @@
-JobIDRaw,JobName,ReqTRES,Start
+JobID,JobName,ReqTRES,Start
 1,job1,cpu=1,1970-01-01T03:00:00