import pandas as pd |
def run(train_df, trust_df, userinfo_df, test_df): |
""" |
参赛者代码。 |
必须重写这个run()方法作为入口函数。 |
输入参数必须为给出的3个训练数据集df以及测试数据集df(如果有部分训练数据并没有用到,也务必传入),并按train_df, trust_df, userinfo_df, test_df顺序传入。 |
方法里需要训练模型(模型不需要保存),然后直接执行预测,得到测试集的预测结果,并输出预测结果的df |
输出的结果中必须包含Item、User、Prediction这3列,且Prediction的值仅可为0/1。 |
""" |
print ( "log here ...." ) |
pred = test_df. apply ( lambda row: 1 , axis = 1 ) |
out_test_df = test_df.copy(deep = True ) |
out_test_df[ "Prediction" ] = pred |
return out_test_df |
if __name__ = = '__main__' : |
""" |
以下代码可作为参赛者本地调参使用,与线上比赛系统无关。 |
线上比赛系统直接调用run()方法作为入口函数。 |
""" |
# 数据从文件读入参考该部分代码。 |
train_file = 'train.csv' |
trust_file = 'trust.csv' |
userinfo_file = 'user_info.csv' |
test_file = 'test.csv' |
predict_file = 'results_tmp.csv' |
train_df = pd.read_csv(train_file, engine = 'python' , encoding = 'utf-8' ) |
trust_df = pd.read_csv(trust_file, engine = 'python' , encoding = 'utf-8' ) |
userinfo_df = pd.read_csv(userinfo_file, engine = 'python' , encoding = 'utf-8' ) |
test_df = pd.read_csv(test_file, engine = 'python' , encoding = 'utf-8' ) |
# 方法调用 |
result_df = run(train_df, trust_df, userinfo_df, test_df) |
result_df.to_csv(predict_file, index = False ) |