#!/usr/bin/env python3

import os
from datetime import datetime, timedelta
from airflow import DAG
from custom_operators.qarnot_operators import *


path_airflow = '.'

default_args = {
    'owner': 'Airflow',
    'depends_on_past': False,
    'start_date': datetime(2000, 1, 1),
    'retries': 0,
    'retry_delay': timedelta(seconds=5),
    # args Qarnot :
    'automatically_trust_ssh_host': 'True',
    'path_outputs': os.path.join(path_airflow, 'spark-outputs'),
    'path_config_qarnot': os.path.join(path_airflow, 'config/qarnot.conf'),
    'path_config_cluster': os.path.join(path_airflow, 'config/tmp_config/cluster.conf'),
    'path_local_logging_config': os.path.join(path_airflow, 'config/python_local_logging.conf')
}

dag = DAG('my_first_dag', default_args=default_args)

t_start_cluster = QarnotStartCluster(
    task_id='start_spark_cluster',
    nb_workers=2,
    ssh_key='<<<MY PUBLIC SSH KEY>>>',
    path_resources=os.path.join(path_airflow, 'spark-resources'),
    job_timeout="01:00:00",
    dag=dag
)

t_run_app_1 = QarnotSubmitApp(
    task_id='launch_spark_app_1',
    path_app='/job/apps/word_count.py',
    dag=dag
)

t_run_app_2 = QarnotSubmitApp(
    task_id='launch_spark_app_2',
    path_app='/job/apps/word_count100.py',
    dag=dag
)

t_fetch_output = QarnotFetchOutput(
    task_id='fetch_output',
    snapshot_wait_sec=160,
    download_locally=True,
    dag=dag
)

t_stop_cluster = QarnotStopCluster(
    task_id='stop_spark_cluster',
    dag=dag
)

(t_start_cluster >> t_run_app_1 >> t_run_app_2
                 >> t_fetch_output >> t_stop_cluster)