Continue the regrassion problem.
Let’s take hours price regression to be example.
https://www.kaggle.com/c/house-prices-advanced-regression-techniques/overview
Linear
from sklearn.linear_model import LinearRegression linear = LinearRegression(normalize=False, fit_intercept=True, copy_X=True) linear.fit(train_X, train_Y) linear_pred = linear.predict(test_X) sub = pd.DataFrame({'Id': ids, 'SalePrice': np.expm1(linear_pred)}) sub.to_csv('house_linear.csv', index=False)
Result:
Lasso
from sklearn import datasets, linear_model lasso = linear_model.Lasso(alpha=1.0) lasso.fit(train_X, train_Y) linear_pred = lasso.predict(test_X) sub = pd.DataFrame({'Id': ids, 'SalePrice': np.expm1(linear_pred)}) sub.to_csv('house_lasso.csv', index=False)
Result:
Ridge
from sklearn import datasets, linear_model ridge = linear_model.Ridge(alpha=1.0) ridge.fit(train_X, train_Y) linear_pred = ridge.predict(test_X) sub = pd.DataFrame({'Id': ids, 'SalePrice': np.expm1(linear_pred)}) sub.to_csv('house_ridge.csv', index=False)
Result:
Random forest
from sklearn.ensemble import RandomForestRegressor rf = RandomForestRegressor(n_estimators=300, min_samples_split=9, min_samples_leaf=10, max_features='sqrt', max_depth=8, bootstrap=False) rf.fit(train_X, train_Y) rf_pred = rf.predict(test_X) sub = pd.DataFrame({'Id': ids, 'SalePrice': np.expm1(rf_pred)}) sub.to_csv('house_rf.csv', index=False)
Result:
Gradient boost
from sklearn.ensemble import GradientBoostingRegressor gdbt = GradientBoostingRegressor(tol=0.1, subsample=0.37, n_estimators=200, max_features=20, max_depth=6, learning_rate=0.03) gdbt.fit(train_X, train_Y) gdbt_pred = gdbt.predict(test_X) sub = pd.DataFrame({'Id': ids, 'SalePrice': np.expm1(gdbt_pred)}) sub.to_csv('house_gdbt.csv', index=False)
Result: