import tensorflow as tf
import numpy as np
import pandas as pd
import json
def dataPreProcess(df_data_in):
dfdata_process = pd.DataFrame()
dfdata_process['Age'] = df_data_in['Age'].fillna(df_data_in['Age'].mean())
dfdata_process['Fare'] = df_data_in['Fare'].fillna(df_data_in['Fare'].mean())
embarked_onehot = pd.get_dummies(df_train_raw['Embarked'])
embarked_onehot.columns = ['Embarked_' + str(x) for x in embarked_onehot.columns]
dfdata_process = pd.concat([dfdata_process, embarked_onehot], axis=1)
sex_onehot = pd.get_dummies(df_train_raw['Sex'])
sex_onehot.columns = ['Embarked_' + str(x) for x in sex_onehot.columns]
dfdata_process = pd.concat([dfdata_process, sex_onehot], axis=1)
selected_cols = ['Pclass', 'SibSp', 'Parch']
dfdata_process = pd.concat([dfdata_process, df_data_in[selected_cols].copy()], axis=1)
return dfdata_process
TRAIN_DATA_PATH = "D:/train.csv"
TEST_DATA_PATH = "D:/test.csv"
df_train_raw = pd.read_csv(TRAIN_DATA_PATH)
df_test_raw = pd.read_csv(TEST_DATA_PATH)
dftrain_process = dataPreProcess(df_train_raw)
print(dftrain_process)
dftest_process = dataPreProcess(df_test_raw)
models = tf.keras.models
layers = tf.keras.layers
model = models.Sequential()
model.add(layers.Dense(15, activation="relu", input_shape=(10,)))
model.add(layers.Dense(15, activation="relu"))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3), metrics=['AUC'])
dftrain_Serviced = df_train_raw['Survived']
history = model.fit(x=dftrain_process, y=dftrain_Serviced, batch_size=32, epochs=50, validation_split=0.2)
predict_result = model.predict(dftest_process)
predict_result = np.where(predict_result >= 0.5, 1, 0)
pd_result = pd.DataFrame()
pd_result['Survived'] = predict_result.reshape(-1)
pd_result['PassengerId'] = df_test_raw['PassengerId'].astype('Int32')
pd_result = pd_result[0:418]
pd_result.to_csv("predict_result.csv", index=False)
pd_groundtrue = pd.read_csv('D:/gender_submission.csv')
print(np.mean(np.equal(pd_result['Survived'].to_numpy(), pd_groundtrue['Survived'].to_numpy())))
model.save('titanic_model.h5')
json.dump(history.history, open('history.json', 'w'))