Config Module¶
Configuration module of cSpark test benchmark
-
config.
BENCHMARK_BENCH
= ['PageRank']¶ Spark-bench benchmark to execute
-
config.
BENCHMARK_PERF
= []¶ Spark-perf benchmark to execute
-
config.
BENCH_CONF
= {'DecisionTree': {'NUM_OF_PARTITIONS': (4, 1200), 'NUM_OF_FEATURES': (3, 6), 'MAX_ITERATION': (21, 1), 'NUM_OF_EXAMPLES': (2, 50000000), 'NUM_OF_CLASS_C': (7, 10)}, 'scala-sort-by-key': {'ScaleFactor': 100}, 'scala-count': {'ScaleFactor': 10}, 'scala-sort-by-key-int': {'ScaleFactor': 50}, 'scala-agg-by-key-int': {'ScaleFactor': 5}, 'scala-agg-by-key-naive': {'ScaleFactor': 10}, 'KMeans': {'NUM_OF_CLUSTERS': (3, 10), 'MAX_ITERATION': (8, 1), 'NUM_OF_POINTS': (2, 100000000), 'NUM_OF_PARTITIONS': (6, 1000), 'DIMENSIONS': (4, 20), 'SCALING': (5, 0.6)}, 'scala-agg-by-key': {'ScaleFactor': 10}, 'PageRank': {'NUM_OF_PARTITIONS': (3, 1000), 'NumTrials': 1, 'numV': (2, 7000000), 'MAX_ITERATION': (8, 1)}, 'scala-count-w-fltr': {'ScaleFactor': 10}, 'SVM': {'NUM_OF_PARTITIONS': (4, 1000), 'NUM_OF_FEATURES': (3, 10), 'NUM_OF_EXAMPLES': (2, 200000000), 'MAX_ITERATION': (7, 1)}}¶ Setting of the supported benchmark
-
config.
CLUSTER_ID
= '1'¶ Id of the cluster with the launched instances
-
config.
CREDENTIAL_PROFILE
= 'cspark'¶ Credential profile name of AWS
-
config.
DATA_AMI
= {'eu-west-1': {'az': 'eu-west-1c', 'price': '0.3', 'ami': 'ami-d3225da0', 'keypair': 'gazzettaEU'}, 'us-west-2': {'az': 'us-west-2c', 'price': '0.25', 'keypair': 'gazzetta', 'ami': 'ami-7f5ff81f', 'snapid': 'snap-4f38bf1c'}}¶ AMI id for region and availability zone
-
config.
HDFS_MASTER
= ''¶ Url of the HDFS NameNode if not set the cluster created is an HDFS Cluster
-
config.
INSTANCE_TYPE
= 'r3.4xlarge'¶ Instance type
-
config.
KEY_PAIR_PATH
= '/home/meteos/gazzetta.pem'¶ KeyPair path for the instance
-
config.
KILL_JAVA
= 1¶ Kill every java application on the cluster
-
config.
NUM_INSTANCE
= 0¶ Number of instance to use
-
config.
NUM_RUN
= 1¶ Number of run to repeat the benchmark
-
config.
PREV_SCALE_FACTOR
= 1000¶ Important Settings if it is equals to SCALE_FACTOR no need to generate new data on HDFS
-
config.
REBOOT
= 0¶ Reboot the instances of the cluster
-
config.
REGION
= 'us-west-2'¶ Region of AWS to use
-
config.
SECURITY_GROUP
= 'spark-cluster'¶ Secutiry group of the instance
-
config.
SPARK_HOME
= '/usr/local/spark/'¶ Location of Spark in the ami
-
config.
UPDATE_SPARK
= 0¶ Git pull and build Spark of all the cluster
-
config.
UPDATE_SPARK_DOCKER
= 0¶ Pull the docker image in each node of the cluster
-
config.
UPDATE_SPARK_MASTER
= 0¶ Git pull and build Spark only of the master node