import os.path
from sklearn.ensemble import *
from sklearn.linear_model import *
from sklearn import svm
import numpy as np
import subprocess as sp
import argparse
import pandas as pd
from sklearn.externals import joblib

def build_model(str_model, file_model, file_train, ratio):

    if os.path.isfile(file_model):
        os.remove(file_model)
    if os.path.exists(file_model):
        print 'already exist'
        return

    try:
        # down sample neg data, generate train files
        sp.check_call(['ruby', 'src/generate_file.rb', '-train',
                       file_model, file_train, str(ratio)])
    except sp.CalledProcessError as e:
        print(e.output)

    train_x = pd.read_csv(file_model + '.train_x')
    train_y = pd.read_csv(file_model + '.train_y', header = None)
    # print train_x, train_y
    exec(str_model)
    #exec calls whatever string was passed, such as: rf = RandomForestClassifier(n_estimators = 500, n_jobs = 1)
    model.fit(train_x, train_y[0])

    os.mkdir(file_model)
    file_obj = file_model + '/model'
    joblib.dump(model, file_obj, compress = 0)

def run_model(file_model, file_data):
    file_obj = file_model + '/model'
    model = joblib.load(file_obj)
    data = pd.read_csv(file_data)
    for i in range(0, len(data)):
        row = data.ix[i, 4:]
        p = model.predict(row)[0]
        print p

parser = argparse.ArgumentParser()
parser.add_argument('--mode',
                    action = 'store',
                    help = 'create model')
parser.add_argument('--model',
                    action = 'store',
                    help = 'string to create model')
parser.add_argument('--file_model',
                    action = 'store',
                    help='model file')
parser.add_argument('--ratio',
                    action = 'store',
                    help='ratio for splitting data')
parser.add_argument('--file_train',
                    action = 'store',
                    help='train file')
parser.add_argument('--file_data',
                    action = 'store',
                    help='data file')
args = parser.parse_args()

if args.mode == 'train':
    build_model(args.model, args.file_model, args.file_train, args.ratio)
elif args.mode == 'run':
    run_model(args.file_model, args.file_data)
else:
    build_model(args.file_model, args.file_train, args.ratio)
    run_model(args.file_model, args.file_data)
