# -*- coding: utf-8 -*-
"""
This code snippet loads the data, trains a k-NN classifier using the
features "danceability", "key", "loudness", "instrumentalness" and "liveness",
and makes a prediction for the test data. The obtained predictions can be
copy-pasted from the python terminal and uploaded to the leaderboard

@author: Andreas
"""
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd #for reading csv files

training=pd.read_csv('training_data.csv', sep=',')
test=pd.read_csv('songs_to_classify.csv', sep=',')

# select which features to use
features = ['danceability','key','loudness','instrumentalness','liveness']
X_train = training.loc[:,features].values
y_train = training.loc[:,'label'].values
X_test = test.loc[:,features].values

# Normalize data. Can also be done using sklearn methods such as
# MinMaxScaler()
X_trainn = X_train*1/np.max(np.abs(X_train),axis=0)
X_testn = X_test*1/np.max(np.abs(X_train),axis=0)


# note: all inputs/features are treated as quantitative/numeric
# some of the features are perhaps more sensible to treat as
# qualitative/cathegorical. For that sklearn preprocessing methods
# such as OneHotEncoder() can be used

# define the k-NN model. To set n_neighbors in a systematic way, use cross validation!
knnmodel = KNeighborsClassifier(n_neighbors = 5)

# feed it with data and train it
knnmodel.fit(X=X_trainn,y=y_train)

# make predictions
predictions = knnmodel.predict(X=X_testn).reshape(-1,1).astype(int).reshape(1,-1)
print(predictions)