-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmain.py
348 lines (284 loc) · 19.8 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
#!/usr/bin/env python3
import argparse
import pathlib
import logzero
from logzero import logger
from filelock import FileLock
# globals for release in the end
cuda_devices = []
got_devices_from_folder = False
def _set_loglevel(loglevel):
if loglevel == "DEBUG":
logzero.loglevel(logzero.DEBUG)
elif loglevel == "INFO":
logzero.loglevel(logzero.INFO)
elif loglevel == "WARNING":
logzero.loglevel(logzero.WARNING)
elif loglevel == "ERROR":
logzero.loglevel(logzero.ERROR)
else:
logger.warning(f"Unknown loglevel {loglevel}, ignoring.")
def _obtain_cuda_devices(number_of_devices, path: pathlib.Path):
lock_file: pathlib.Path = path / ".lock"
if not lock_file.exists():
raise ValueError(f"Invalid GPU directory, lock_file {lock_file} does not exist. Exiting")
lock = FileLock(lock_file)
gpus = []
with lock:
for i in range(number_of_devices):
available_gpus = list(path.rglob("*.gpu"))
if not available_gpus:
raise Exception("No GPU available! This should never happen.")
selected_gpu: pathlib.PosixPath = available_gpus[0]
selected_gpu.unlink()
gpus.append(int(selected_gpu.stem))
return gpus
def _release_cuda_devices(cuda_devices, path: pathlib.Path):
for cuda_device in cuda_devices:
gpu_file = path / (str(cuda_device) + ".gpu")
gpu_file.touch(exist_ok=True)
def _train_or_solve(args):
global cuda_devices
global got_devices_from_folder
if args.solver == "dgl-treesearch":
from solvers.dgl_tree import DGLTreesearch
solver = DGLTreesearch()
elif args.solver == "intel-treesearch":
from solvers.intel_treesearch import IntelTreesearch
solver = IntelTreesearch()
elif args.solver == "gurobi":
from solvers.gurobi import Gurobi
solver = Gurobi()
elif args.solver == "kamis":
from solvers.kamis import KaMIS
solver = KaMIS()
elif args.solver == "lwd":
from solvers.lwd import LearningWhatToDefer
solver = LearningWhatToDefer()
else:
solver = None
if solver == None:
raise ValueError(f"Solver {args.solver} unknown or not implemented yet.")
if args.pretrained_weights:
solver.load_weights(args.pretrained_weights)
if args.operation == "solve":
if args.cuda_devices:
cuda_devices = args.cuda_devices
if args.num_cuda_devices > 0:
logger.warn(f"Both --cuda_devices and --num_cuda_devices were supplied. Falling back to --cuda_devices = {cuda_devices}!")
else:
if args.num_cuda_devices > 0:
cuda_devices = _obtain_cuda_devices(args.num_cuda_devices, args.cuda_device_folder)
got_devices_from_folder = True
logger.info(f"Obtained cuda_devices={cuda_devices} from {args.cuda_device_folder}.")
else:
cuda_devices = []
logger.info("No cuda devices supplied, disabling CUDA.")
parameters = {
"num_threads": args.num_threads,
"cuda_devices": cuda_devices,
"time_limit": args.time_limit,
"loglevel": args.loglevel
}
if args.weighted:
parameters["weighted"] = "yes"
if args.self_loops:
parameters["self_loops"] = "yes"
if args.reduction:
parameters["reduction"] = "yes"
if args.local_search:
parameters["local_search"] = "yes"
if args.queue_pruning:
parameters["queue_pruning"] = "yes"
if args.max_prob_maps:
parameters["max_prob_maps"] = args.max_prob_maps
if args.model_prob_maps:
parameters["model_prob_maps"] = args.model_prob_maps
if args.noise_as_prob_maps:
parameters["noise_as_prob_maps"] = "yes"
if args.weighted_queue_pop:
parameters["weighted_queue_pop"] = "yes"
if args.maximum_iterations_per_episode:
parameters["maximum_iterations_per_episode"] = args.maximum_iterations_per_episode
if args.max_nodes:
parameters["max_nodes"] = args.max_nodes
if args.quadratic:
parameters["quadratic"] = "yes"
if args.write_mps:
parameters["write_mps"] = "yes"
if args.prm_file:
parameters["prm_file"] = args.prm_file
solver.solve(args.input_folder, args.output_folder, parameters)
elif args.operation == "train":
if args.cuda_devices:
cuda_devices = args.cuda_devices
if args.num_cuda_devices > 0:
logger.warn(f"Both --cuda_devices and --num_cuda_devices were supplied. Falling back to --cuda_devices = {cuda_devices}!")
else:
if args.num_cuda_devices > 0:
cuda_devices = _obtain_cuda_devices(args.num_cuda_devices, args.cuda_device_folder)
got_devices_from_folder = True
logger.info(f"Obtained cuda_devices={cuda_devices} from {args.cuda_device_folder}.")
else:
logger.info("No cuda devices supplied, disabling CUDA.")
parameters = {
"cuda_devices": cuda_devices,
"loglevel": args.loglevel
}
if args.weighted:
parameters["weighted"] = "yes"
if args.self_loops:
parameters["self_loops"] = "yes"
if args.model_prob_maps:
parameters["model_prob_maps"] = args.model_prob_maps
if args.epochs:
parameters["epochs"] = args.epochs
if args.lr:
parameters["lr"] = args.lr
# LwD
if args.maximum_iterations_per_episode:
parameters["maximum_iterations_per_episode"] = args.maximum_iterations_per_episode
if args.num_unrolling_iterations:
parameters["num_unrolling_iterations"] = args.num_unrolling_iterations
if args.num_environments_per_batch:
parameters["num_environments_per_batch"] = args.num_environments_per_batch
if args.gradient_step_batch_size:
parameters["gradient_step_batch_size"] = args.gradient_step_batch_size
if args.gradient_steps_per_update:
parameters["gradient_steps_per_update"] = args.gradient_steps_per_update
if args.diversity_reward_coefficient:
parameters["diversity_reward_coefficient"] = args.diversity_reward_coefficient
if args.max_entropy_coefficient:
parameters["max_entropy_coefficient"] = args.max_entropy_coefficient
if args.num_updates:
parameters["num_updates"] = args.num_updates
if args.training_graph_idx:
parameters["training_graph_idx"] = args.training_graph_idx
if args.max_nodes:
parameters["max_nodes"] = args.max_nodes
solver.train(args.input_folder, args.output_folder, parameters)
else:
logger.error(f"Unknown operation: {args.operation}")
logger.info("Operation done, exiting.")
def _data_generation(args):
if args.type == "sat":
from data_generation.sat import SATGraphDataGenerator
gen = SATGraphDataGenerator(args.input_folder, args.output_folder)
elif args.type == "random":
from data_generation.random_graph import RandomGraphGenerator, ErdosRenyi, BarabasiAlbert, HolmeKim, WattsStrogatz, HyperbolicRandomGraph
if args.model == "er":
graph_generator = ErdosRenyi(args.min_n, args.max_n, args.er_p)
elif args.model == "ba":
graph_generator = BarabasiAlbert(args.min_n, args.max_n, args.ba_m)
elif args.model == "hk":
graph_generator = HolmeKim(args.min_n, args.max_n, args.hk_m, args.hk_p)
elif args.model == "ws":
graph_generator = WattsStrogatz(args.min_n, args.max_n, args.ws_k, args.ws_p)
elif args.model == "hrg":
graph_generator = HyperbolicRandomGraph(args.min_n, args.max_n, args.hrg_alpha, args.hrg_t, args.hrg_degree, args.hrg_threads)
else:
raise ValueError(f"Unknown random graph model {args.model}")
gen = RandomGraphGenerator(args.output_folder, graph_generator, num_graphs=args.num_graphs)
elif args.type == "realworld":
from data_generation.realworld import RealWorldGraphGenerator
limit_rw_graphs = None
if args.limit_rw_graphs:
limit_rw_graphs = args.limit_rw_graphs
gen = RealWorldGraphGenerator(args.output_folder, limit=limit_rw_graphs)
else:
raise ValueError(f"Unsupported data type: {args.type}")
gen.generate(gen_labels=args.gen_labels, weighted=args.weighted)
def main(args):
### Set logging mode ###
_set_loglevel(args.loglevel)
# Imports are deferred to defer slow DGL import
if args.operation in ["train", "solve"]:
_train_or_solve(args)
elif args.operation == "gendata":
_data_generation(args)
else:
logger.error(f"Unknown operation: {args.operation}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Testbench for MIS solvers.")
subparsers = parser.add_subparsers(help='sub-command help', dest="operation")
# Global flags
parser.add_argument("--loglevel", type=str, action="store", default="DEBUG", choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="Verbosity of logging (DEBUG/INFO/WARNING/ERROR)")
parser.add_argument("--self_loops", action="store_true", default=False, help="Enable self loops addition (in input data) for GCN-based model.")
train_parser = subparsers.add_parser("train", help="Model training")
train_parser.add_argument("--weighted", action="store_true", default=False, help="If enabled, solve the weighted MIS problem instead of MIS.")
train_parser.add_argument("--cuda_devices", type=int, nargs="*", action="store", default=[], help="Which cuda devices should be used (distributed around the threads in round-robin fashion). If not given and --num_cuda_devices is not used, CUDA is disabled.")
train_parser.add_argument("--num_cuda_devices", type=int, action="store", default=0, help="Alternative to --cuda_devices. Uses a folder to manage available GPUs.")
train_parser.add_argument("--cuda_device_folder", type=pathlib.Path, action="store", default="/tmp/gpus", help="Folder containing a lockfile for the GPU management. ")
train_parser.add_argument("--pretrained_weights", type=pathlib.Path, nargs="?", action="store", help="Pre-trained weights to be used for solving/continuing training.")
train_parser.add_argument("--lr", type=float, action="store", help="Learning rate (for training)")
train_parser.add_argument("--epochs", type=int, action="store", help="Number of epochs to train for")
train_parser.add_argument("--model_prob_maps", type=int, action="store", help="Treesearch (Intel/DGL) specific: Number of probability maps the model was/should be trained for.")
# Lwd Arguments
train_parser.add_argument("--maximum_iterations_per_episode", type=int, action="store", help="LwD specific: Maximum iterations before the MDP timeouts.")
train_parser.add_argument("--num_unrolling_iterations", type=int, action="store", help="LwD specific: Maximum number of unrolling iterations (how many stages we have per graph during training).")
train_parser.add_argument("--num_environments_per_batch", type=int, action="store", help="LwD specific: Graph batch size during training.")
train_parser.add_argument("--gradient_step_batch_size", type=int, action="store", help="LwD specific: Batch size for gradient step.")
train_parser.add_argument("--gradient_steps_per_update", type=int, action="store", help="LwD specific: Number of gradient steps per update.")
train_parser.add_argument("--diversity_reward_coefficient", type=float, action="store", help="LwD specific: Diversity reward coefficient.")
train_parser.add_argument("--max_entropy_coefficient", type=float, action="store", help="LwD specific: Entropy coefficient.")
train_parser.add_argument("--num_updates", type=int, action="store", help="LwD specific: How many PPO updates to do.")
train_parser.add_argument("--training_graph_idx", type=int, action="store", help="LwD specific: On which graph index to continue training.")
train_parser.add_argument("--max_nodes", type=int, action="store", help="LwD specific: If you have lots of graphs, the determiniation of maximum number of nodes takes some time. If this value is given, you can force-overwrite it to save time.")
train_parser.add_argument("solver", type=str, help="Solver to use.", choices=["dgl-treesearch", "intel-treesearch", "lwd"])
train_parser.add_argument("input_folder", type=pathlib.Path, action="store", help="Directory containing input")
train_parser.add_argument("output_folder", type=pathlib.Path, action="store", help="Folder in which the output should be stored (e.g. json containg statistics and solution will be stored, or trained weights)")
solve_parser = subparsers.add_parser("solve", help="Call a solver")
solve_parser.add_argument("--time_limit", type=int, nargs="?", action="store", default=600, help="Time limit in seconds")
solve_parser.add_argument("--num_threads", type=int, nargs="?", action="store", default=8, help="Maximum number of threads to use.")
solve_parser.add_argument("--weighted", action="store_true", default=False, help="If enabled, solve the weighted MIS problem instead of MIS.")
solve_parser.add_argument("--cuda_devices", type=int, nargs="*", action="store", default=[], help="Which cuda devices should be used (distributed around the threads in round-robin fashion). If not given, CUDA is disabled.")
solve_parser.add_argument("--num_cuda_devices", type=int, action="store", default=0, help="Alternative to --cuda_devices. Uses a folder to manage available GPUs.")
solve_parser.add_argument("--cuda_device_folder", type=pathlib.Path, action="store", default="/tmp/gpus", help="Folder containing a lockfile for the GPU management. ")
solve_parser.add_argument("--pretrained_weights", type=pathlib.Path, nargs="?", action="store", help="Pre-trained weights to be used for solving/continuing training.")
solve_parser.add_argument("--reduction", action="store_true", default=False, help="If enabled, reduce graph during tree search.")
solve_parser.add_argument("--local_search", action="store_true", default=False, help="If enabled, use local_search if time left.")
solve_parser.add_argument("--queue_pruning", action="store_true", default=False, help="(DGL-Treesearch only) If enabled, prune search queue.")
solve_parser.add_argument("--noise_as_prob_maps", action="store_true", default=False, help="(DGL-Treesearch and LwD only) If enabled, use uniform noise instead of GNN output.")
solve_parser.add_argument("--weighted_queue_pop", action="store_true", default=False, help="(DGL-Treesearch only) If enabled, choose element from queue with probability inverse proportional to # of unlabelled vertices in it.")
solve_parser.add_argument("--max_prob_maps", type=int, action="store", help="DGL-TS specific: number of probability maps to use.")
solve_parser.add_argument("--model_prob_maps", type=int, action="store", help="Treesearch (Intel/DGL) specific: Number of probability maps the model was/should be trained for.")
solve_parser.add_argument("--maximum_iterations_per_episode", type=int, action="store", help="LwD specific: Maximum iterations before the MDP timeouts.")
solve_parser.add_argument("--max_nodes", type=int, action="store", help="LwD specific: If you have lots of graphs, the determiniation of maximum number of nodes takes some time. If this value is given, you can force-overwrite it to save time.")
solve_parser.add_argument("--quadratic", action="store_true", default=False, help="Gurobi specific: Whether a quadratic program should be used instead of a linear program to solve the MIS problem (cannot be used together with weighted)")
solve_parser.add_argument("--write_mps", action="store_true", default=False, help="Gurobi specific: Instead of solving, write mps output (e.g., for tuning)")
solve_parser.add_argument("--prm_file", type=pathlib.Path, nargs="?", action="store", help="Gurboi specific: Gurobi parameter file (e.g. by grbtune).")
solve_parser.add_argument("solver", type=str, help="Solver to use.", choices=["dgl-treesearch", "intel-treesearch", "gurobi", "kamis", "lwd"])
solve_parser.add_argument("input_folder", type=pathlib.Path, action="store", help="Directory containing input")
solve_parser.add_argument("output_folder", type=pathlib.Path, action="store", help="Folder in which the output should be stored (e.g. json containg statistics and solution will be stored, or trained weights)")
data_gen_parser = subparsers.add_parser("gendata", help="Generate input data")
data_gen_parser.add_argument("type", type=str, help="Which data should be generated", choices=["sat", "random", "realworld"])
data_gen_parser.add_argument("input_folder", type=pathlib.Path, action="store", help="Directory containing input (ignored if type in ['random', 'realworld'])")
data_gen_parser.add_argument("output_folder", type=pathlib.Path, action="store", help="Folder in which the output should be stored (e.g. json containg statistics and solution will be stored, or trained weights)")
data_gen_parser.add_argument("--limit_rw_graphs", type=int, help="[For type = realworld] How many graphs to sample from downloaded realworld datasets")
data_gen_parser.add_argument("--gen_labels", action="store_true", default=False, help="If enabled, generate labels for graphs using Gurobi.")
data_gen_parser.add_argument("--weighted", action="store_true", default=False, help="If enabled, generate random vertex weights.")
data_gen_parser.add_argument("--model", type=str, help="[For type = random] Which random graph model should be used", choices=["er", "ba", "hk", "ws", "hrg"], default="er")
data_gen_parser.add_argument("--min_n", type=int, help="[For type = random] Minimum number of nodes in the random graph", default=100)
data_gen_parser.add_argument("--max_n", type=int, help="[For type = random] Maximum number of nodes in the random graph", default=100)
data_gen_parser.add_argument("--num_graphs", type=int, help="[For type = random] How many graphs to generate", default=10)
data_gen_parser.add_argument("--er_p", type=float, help="[For type = random and model = er] Parameter p", default=0.5)
data_gen_parser.add_argument("--ba_m", type=int, help="[For type = random and model = ba] Parameter m", default=10)
data_gen_parser.add_argument("--hk_m", type=int, help="[For type = random and model = hk] Parameter m", default=10)
data_gen_parser.add_argument("--hk_p", type=float, help="[For type = random and model = hk] Parameter p", default=0.5)
data_gen_parser.add_argument("--ws_k", type=int, help="[For type = random and model = ws] Parameter k", default=2)
data_gen_parser.add_argument("--ws_p", type=float, help="[For type = random and model = w] Parameter p", default=0.5)
data_gen_parser.add_argument("--hrg_alpha", type=float, help="[For type = random and model = hrg] Parameter Alpha", default=0.75)
data_gen_parser.add_argument("--hrg_t", type=float, help="[For type = random and model = hrg] Parameter t", default=0)
data_gen_parser.add_argument("--hrg_degree", type=float, help="[For type = random and model = hrg] Parameter degree", default=2)
data_gen_parser.add_argument("--hrg_threads", type=float, help="[For type = random and model = hrg] How many threads to use for hyperbolic algorithm", default=8)
# Optionals without default (defaults are set in the model)
args = parser.parse_args()
args.output_folder.mkdir(parents=True, exist_ok=True)
try:
main(args)
except KeyboardInterrupt:
pass
finally:
if got_devices_from_folder:
logger.info("Releasing cuda devices.")
_release_cuda_devices(cuda_devices, args.cuda_device_folder)