In [1]: import numpy as np
In [2]: import statsmodels.api as sm
Create some data
In [3]: nsample = 50
In [4]: sig = 0.25
In [5]: x1 = np.linspace(0, 20, nsample)
In [6]: X = np.c_[x1, np.sin(x1), (x1 - 5)**2, np.ones(nsample)]
In [7]: beta = [0.5, 0.5, -0.02, 5.]
In [8]: y_true = np.dot(X, beta)
In [9]: y = y_true + sig * np.random.normal(size=nsample)
Setup and estimate the model
In [10]: olsmod = sm.OLS(y, X)
In [11]: olsres = olsmod.fit()
In [12]: print olsres.params
[ 0.5102 0.4748 -0.0212 4.9435]
In [13]: print olsres.bse
[ 0.0129 0.0507 0.0011 0.0837]
In-sample prediction
In [14]: ypred = olsres.predict(X)
Create a new sample of explanatory variables Xnew, predict and plot
In [15]: x1n = np.linspace(20.5, 25, 10)
In [16]: Xnew = np.c_[x1n, np.sin(x1n), (x1n - 5)**2, np.ones(10)]
In [17]: ynewpred = olsres.predict(Xnew) # predict out of sample
In [18]: print ypred
[ 4.413 4.8928 5.3345 5.7123 6.0097 6.2222 6.358 6.437
6.487 6.5396 6.6246 6.7656 6.9756 7.2552 7.5923 7.9644
8.3419 8.6933 8.99 9.2115 9.3483 9.4037 9.3932 9.3421
9.281 9.2412 9.2491 9.3223 9.4663 9.6742 9.9269 10.1972
10.4537 10.6661 10.8102 10.8718 10.8491 10.7532 10.6058 10.436
10.2755 10.153 10.0903 10.0977 10.1733 10.3024 10.4605 10.6174
10.7417 10.8061]
In [19]: import matplotlib.pyplot as plt
In [20]: plt.figure();
In [21]: plt.plot(x1, y, 'o', x1, y_true, 'b-');
In [22]: plt.plot(np.hstack((x1, x1n)), np.hstack((ypred, ynewpred)), 'r');
In [23]: plt.title('OLS prediction, blue: true and data, fitted/predicted values:red');