In [1]: import statsmodels.api as sm
In [2]: star98 = sm.datasets.star98.load_pandas().data
In [3]: formula = ('SUCCESS ~ LOWINC + PERASIAN + PERBLACK + PERHISP + PCTCHRT '
...: '+ PCTYRRND + PERMINTE*AVYRSEXP*AVSALK + PERSPENK*PTRATIO*PCTAF')
...:
In [4]: dta = star98[['NABOVE', 'NBELOW', 'LOWINC', 'PERASIAN', 'PERBLACK', 'PERHISP',
...: 'PCTCHRT', 'PCTYRRND', 'PERMINTE', 'AVYRSEXP', 'AVSALK',
...: 'PERSPENK', 'PTRATIO', 'PCTAF']]
...:
In [5]: endog = dta['NABOVE'] / (dta['NABOVE'] + dta.pop('NBELOW'))
In [6]: del dta['NABOVE']
In [7]: dta['SUCCESS'] = endog
In [8]: mod = sm.GLM.from_formula(formula=formula, data=dta,
...: family=sm.families.Binomial()).fit()
...:
try passing a formula object, using arbitrary user-injected code
In [9]: def double_it(x):
...: return 2 * x
...:
In [10]: formula = ('SUCCESS ~ double_it(LOWINC) + PERASIAN + PERBLACK + PERHISP + '
....: 'PCTCHRT + PCTYRRND + PERMINTE*AVYRSEXP*AVSALK'
....: '+ PERSPENK*PTRATIO*PCTAF')
....:
In [11]: mod2 = sm.GLM.from_formula(formula=formula, data=dta,
....: family=sm.families.Binomial()).fit()
....: