#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: jcgomez
"""
import pandas as pd
import os
import glob
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from random import randint

def gen_rand_od():
    #generation of randon numbers from 0 to 9 (AeDES scale) in 4 columns (vs, fl, roof, ip)
    a = [randint(0,9) for p in range(0, 4)]
    return(a)

def compute_damage_conversion(infile):
    dc = pd.read_csv(infile)
    dc.fillna(0,inplace=True)
    X1 = dc.iloc[:,1:]  #all attributes TRAINING 
    Y1 = dc.iloc[:,0]  #damage state labeled  TRAINING
    #the prefefined method here in Gaussian-Naive-Bayes, but you can change it later to others (e.g. logit_reg)
    #naive_bayes = GaussianNB()
    #naive_bayes.fit(X1,Y1)
    # to change it later to other method (i.e. logit_reg), uncomment the following and comment the lines related to naive_bayes
    logit_reg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial') 
    logit_reg.fit(X1, Y1)
    #return (dc, naive_bayes)
    return (dc,logit_reg)

def get_damage_conv(ds,target='B'):
    tab_target = B_given_od
    tab_source = A_given_od
    if target=='A':
        tab_target = A_given_od
        tab_source = B_given_od
        if not ds in set(ds_B):
            raise('Error, damage state not compatible')
    elif not ds in set(ds_A):
            raise('Error, damage state not compatible')
    res1 = tab_target.transpose().mul(tab_source[ds]*p_od).transpose()
    res2 = np.sum(res1/tab_source.mean()[ds])
    return res2

import json
def get_model_dict(sourceschema,sourcetaxonomy,targetschema,targettaxonomy,source_ds,target_ds,conv_matrix):
    BtoA_model = {}
    BtoA_model['source_schema'] = sourceschema
    BtoA_model['source_taxonomy'] = sourcetaxonomy
    BtoA_model['target_schema'] = targetschema
    BtoA_model['target_taxonomy'] = targettaxonomy
    BtoA_model['source_damage_states'] = source_ds.tolist()
    BtoA_model['target_damage_states'] = target_ds.tolist()
    BtoA_model['conv_matrix'] = conv_matrix
    return BtoA_model

def save_modeldict(model,outfile):
    if outfile != "":
        model['conv_matrix'] = eval(model['conv_matrix'].to_json())
        with open(outfile, 'w') as fp:
            json.dump(model,fp)

def load_model_dict(infile):
    with open(infile) as f:
        model = json.loads(f.read())
    model['conv_matrix'] = pd.read_json(model['conv_matrix'])
    return model

path_Medina_2019 = '/home/jcgomez/data/Nextcloud/Data_management/Data_Publication_Multi-risk_paper/Probabilistic_inter-scheme_compatibility_matrices_for_multi-hazard_exposure_modeling/Inter-scheme_damage_conversion_matrices_from_AeDEs_heuristics/Generate_DS_OD_comb_heuristics/Medina_2019/'
path_sara = '/home/jcgomez/data/Nextcloud/Data_management/Data_Publication_Multi-risk_paper/Probabilistic_inter-scheme_compatibility_matrices_for_multi-hazard_exposure_modeling/Inter-scheme_damage_conversion_matrices_from_AeDEs_heuristics/Generate_DS_OD_comb_heuristics/SARA/'

#Please change this to your prefered output folder
path_output = '/home/jcgomez/data/Nextcloud/Data_management/Data_Publication_Multi-risk_paper/Probabilistic_inter-scheme_compatibility_matrices_for_multi-hazard_exposure_modeling/Inter-scheme_damage_conversion_matrices_from_AeDEs_heuristics/Conversion_matrices/Conv_matrix_LogisticRegr/json/'
path_csv = '/home/jcgomez/data/Nextcloud/Data_management/Data_Publication_Multi-risk_paper/Probabilistic_inter-scheme_compatibility_matrices_for_multi-hazard_exposure_modeling/Inter-scheme_damage_conversion_matrices_from_AeDEs_heuristics/Conversion_matrices/Conv_matrix_LogisticRegr/csv/'
path_prob = '/home/jcgomez/data/Nextcloud/Data_management/Data_Publication_Multi-risk_paper/Probabilistic_inter-scheme_compatibility_matrices_for_multi-hazard_exposure_modeling/Inter-scheme_damage_conversion_matrices_from_AeDEs_heuristics/Inter-scheme_damage_conversion_matrices_from_AeDEs_heuristics/Generate_DS_OD_comb_heuristics/Probabilities_OD_SARA-Medina2019_Logistic-Regression/'

files_Medina_2019 = glob.glob(os.path.join(path_Medina_2019,'*.csv'))
files_sara = glob.glob(os.path.join(path_sara,'*.csv'))

for infile_A in files_Medina_2019: 
    dc_A,lrmodel_A = compute_damage_conversion(infile_A)
    ds_A  = dc_A.DS.unique()
    set_od = pd.DataFrame([gen_rand_od() for p in range(0, 100000)]).drop_duplicates() #TESTING!! (not the test)
    set_od.columns = dc_A.columns[1:]
    p_od = 1/len(set_od)
    for infile_B in files_sara:
         #read damage conversion input and fit a regression model
        dc_B,lrmodel_B = compute_damage_conversion(infile_B)
        if not np.alltrue(dc_A.columns == dc_B.columns):
            raise Exception('Error, Damage observation do not match')
        #get damage states
        ds_B  = dc_B.DS.unique()
        #compute conditional probabilities
        A_given_od = pd.DataFrame(lrmodel_A.predict_proba(set_od))
        B_given_od = pd.DataFrame(lrmodel_B.predict_proba(set_od))
        
        AtoB_conv_matrix = pd.DataFrame([get_damage_conv(d,'B') for d in ds_A])
        BtoA_conv_matrix = pd.DataFrame([get_damage_conv(d,'A') for d in ds_B])
        
        a = infile_A.split('/')[-2].split('/')[-1]
        b = infile_A.split ('/')[-1].split('.')[0]
        c = infile_B.split('/')[-2].split('/')[-1]
        d = infile_B.split('/')[-1].split('.')[0]
        
        results = pd.DataFrame(B_given_od)
        results= results.join(set_od.reset_index())
        #to obtain the probabilistic description of observed damages in .CVS format, uncomment the next line.
        #results.to_csv(os.path.join(path_prob,'NB_'+c+'_'+d+'_to_'+a+'_'+b+'.csv'))
        #
        results = pd.DataFrame(A_given_od)
        results= results.join(set_od.reset_index())
        #to obtain the probabilistic description of observed damages in .CVS format, uncomment the next line.
        #results.to_csv(a+'_'+b+'_to_'+c+'_'+d+'.csv')

        #We care about BtoA becasuse it's SARA-Medina
        model_dict_BtoA = get_model_dict(c,d,a,b,ds_B,ds_A,BtoA_conv_matrix)
        #However, we could also obtain Ato_B (Medina-SARA)
        model_dict_AtoB = get_model_dict(a,b,c,d,ds_A,ds_B,AtoB_conv_matrix)             
        
        #to obtain the compatibility_matrices in .CVS format, uncomment the next line.
        model_dict_BtoA['conv_matrix'].to_csv(os.path.join(path_csv,'CM_'+c+'_'+d+'_to_'+a+'_'+b+'.csv'))
        model_dict_AtoB['conv_matrix'].to_csv(os.path.join(path_csv,'CM_'+a+'_'+b+'_to_'+c+'_'+d+'.csv'))

        #Save SARA-Medina conversion matrices in JSON format:
        #save_modeldict(model_dict_BtoA,os.path.join(path_output,c+'_'+d+'_to_'+a+'_'+b+'.json'))
        #Save Medina-SARA conversion matrices in JSON format:
        #save_modeldict(model_dict_AtoB, os.path.join(path_output,a +'_'+b+'_to_'+c+'_'+d+'.json'))
