Browse Source

checkpoint dir to settings, rename main_back to main_with_collect

master
nitowa 1 year ago
parent
commit
023d7c8b99
4 changed files with 3 additions and 2 deletions
  1. 1
    0
      settings.json
  2. 1
    1
      src/spark/main.py
  3. 1
    1
      src/spark/main_graphs.py
  4. 0
    0
      src/spark/main_with_collect.py

+ 1
- 0
settings.json View File

@@ -16,6 +16,7 @@
16 16
     "spark_master": "spark://osboxes:7077",
17 17
     "spark_worker_memory": "1g",
18 18
     "spark_event_logging": "true",
19
+    "spark_checkpoint_dir": "./checkpoints",
19 20
 
20 21
     "debug": false
21 22
 }

+ 1
- 1
src/spark/main.py View File

@@ -56,7 +56,7 @@ class Master:
56 56
 
57 57
 master = Master(config)
58 58
 master.spark.catalog.clearCache()
59
-master.spark.sparkContext.setCheckpointDir('./checkpoints')
59
+master.spark.sparkContext.setCheckpointDir(config['spark_checkpoint_dir'])
60 60
 tx_df = master.get_tx_dataframe()
61 61
 
62 62
 #Turn transactions into a list of ('id', [addr, addr, ...])

+ 1
- 1
src/spark/main_graphs.py View File

@@ -45,7 +45,7 @@ class Master:
45 45
 # end class Master
46 46
 
47 47
 master = Master(config)
48
-master.spark.sparkContext.setCheckpointDir('./checkpoints')  # spark is really adamant it needs this even if the algorithm is set to the non-checkpointed version
48
+master.spark.sparkContext.setCheckpointDir(config['spark_checkpoint_dir'])
49 49
 
50 50
 tx_df = master.get_tx_dataframe()
51 51
 

src/spark/main_bak.py → src/spark/main_with_collect.py View File


Loading…
Cancel
Save