Model selection:Regression

Continue the regrassion problem.

Let’s take hours price regression to be example.

https://www.kaggle.com/c/house-prices-advanced-regression-techniques/overview

Linear

from sklearn.linear_model import LinearRegression

linear = LinearRegression(normalize=False, fit_intercept=True, copy_X=True)
linear.fit(train_X, train_Y)
linear_pred = linear.predict(test_X)
sub = pd.DataFrame({'Id': ids, 'SalePrice': np.expm1(linear_pred)})
sub.to_csv('house_linear.csv', index=False)

Result:
h_linear

Lasso

from sklearn import datasets, linear_model
lasso = linear_model.Lasso(alpha=1.0)
lasso.fit(train_X, train_Y)
linear_pred = lasso.predict(test_X)
sub = pd.DataFrame({'Id': ids, 'SalePrice': np.expm1(linear_pred)})
sub.to_csv('house_lasso.csv', index=False) 

Result:
h_lasso

Ridge

from sklearn import datasets, linear_model
ridge = linear_model.Ridge(alpha=1.0)
ridge.fit(train_X, train_Y)
linear_pred = ridge.predict(test_X)
sub = pd.DataFrame({'Id': ids, 'SalePrice': np.expm1(linear_pred)})
sub.to_csv('house_ridge.csv', index=False) 

Result:
h_ridge

Random forest

from sklearn.ensemble import  RandomForestRegressor

rf = RandomForestRegressor(n_estimators=300, min_samples_split=9, min_samples_leaf=10, 
                           max_features='sqrt', max_depth=8, bootstrap=False)
rf.fit(train_X, train_Y)
rf_pred = rf.predict(test_X)
sub = pd.DataFrame({'Id': ids, 'SalePrice': np.expm1(rf_pred)})
sub.to_csv('house_rf.csv', index=False)

Result:
h_rf

Gradient boost

from sklearn.ensemble import GradientBoostingRegressor
gdbt = GradientBoostingRegressor(tol=0.1, subsample=0.37, n_estimators=200, max_features=20, 
                                 max_depth=6, learning_rate=0.03)
gdbt.fit(train_X, train_Y)
gdbt_pred = gdbt.predict(test_X)
sub = pd.DataFrame({'Id': ids, 'SalePrice': np.expm1(gdbt_pred)})
sub.to_csv('house_gdbt.csv', index=False)

Result:
h_gdbt

發表留言