Config Module¶
Configuration module of cSpark test benchmark
-
config.BENCHMARK_BENCH= ['PageRank']¶ Spark-bench benchmark to execute
-
config.BENCHMARK_PERF= []¶ Spark-perf benchmark to execute
-
config.BENCH_CONF= {'DecisionTree': {'NUM_OF_PARTITIONS': (4, 1200), 'NUM_OF_FEATURES': (3, 6), 'MAX_ITERATION': (21, 1), 'NUM_OF_EXAMPLES': (2, 50000000), 'NUM_OF_CLASS_C': (7, 10)}, 'scala-sort-by-key': {'ScaleFactor': 100}, 'scala-count': {'ScaleFactor': 10}, 'scala-sort-by-key-int': {'ScaleFactor': 50}, 'scala-agg-by-key-int': {'ScaleFactor': 5}, 'scala-agg-by-key-naive': {'ScaleFactor': 10}, 'KMeans': {'NUM_OF_CLUSTERS': (3, 10), 'MAX_ITERATION': (8, 1), 'NUM_OF_POINTS': (2, 100000000), 'NUM_OF_PARTITIONS': (6, 1000), 'DIMENSIONS': (4, 20), 'SCALING': (5, 0.6)}, 'scala-agg-by-key': {'ScaleFactor': 10}, 'PageRank': {'NUM_OF_PARTITIONS': (3, 1000), 'NumTrials': 1, 'numV': (2, 7000000), 'MAX_ITERATION': (8, 1)}, 'scala-count-w-fltr': {'ScaleFactor': 10}, 'SVM': {'NUM_OF_PARTITIONS': (4, 1000), 'NUM_OF_FEATURES': (3, 10), 'NUM_OF_EXAMPLES': (2, 200000000), 'MAX_ITERATION': (7, 1)}}¶ Setting of the supported benchmark
-
config.CLUSTER_ID= '1'¶ Id of the cluster with the launched instances
-
config.CREDENTIAL_PROFILE= 'cspark'¶ Credential profile name of AWS
-
config.DATA_AMI= {'eu-west-1': {'az': 'eu-west-1c', 'price': '0.3', 'ami': 'ami-d3225da0', 'keypair': 'gazzettaEU'}, 'us-west-2': {'az': 'us-west-2c', 'price': '0.25', 'keypair': 'gazzetta', 'ami': 'ami-7f5ff81f', 'snapid': 'snap-4f38bf1c'}}¶ AMI id for region and availability zone
-
config.HDFS_MASTER= ''¶ Url of the HDFS NameNode if not set the cluster created is an HDFS Cluster
-
config.INSTANCE_TYPE= 'r3.4xlarge'¶ Instance type
-
config.KEY_PAIR_PATH= '/home/meteos/gazzetta.pem'¶ KeyPair path for the instance
-
config.KILL_JAVA= 1¶ Kill every java application on the cluster
-
config.NUM_INSTANCE= 0¶ Number of instance to use
-
config.NUM_RUN= 1¶ Number of run to repeat the benchmark
-
config.PREV_SCALE_FACTOR= 1000¶ Important Settings if it is equals to SCALE_FACTOR no need to generate new data on HDFS
-
config.REBOOT= 0¶ Reboot the instances of the cluster
-
config.REGION= 'us-west-2'¶ Region of AWS to use
-
config.SECURITY_GROUP= 'spark-cluster'¶ Secutiry group of the instance
-
config.SPARK_HOME= '/usr/local/spark/'¶ Location of Spark in the ami
-
config.UPDATE_SPARK= 0¶ Git pull and build Spark of all the cluster
-
config.UPDATE_SPARK_DOCKER= 0¶ Pull the docker image in each node of the cluster
-
config.UPDATE_SPARK_MASTER= 0¶ Git pull and build Spark only of the master node