Features

%load_ext autoreload
%autoreload 2

import torch
import numpy as np

Read the example df

`read_football_csv`[source]

read_football_csv(path)

df = read_football_csv(Path('../data/football_data_uk/raw/germany/D1_1415.csv'))
df.head()

	Div	Date	HomeTeam	AwayTeam	FTHG	FTAG	FTR	HTHG	HTAG	HTR	...	BbAv<2.5	BbAH	BbAHh	BbMxAHH	BbAvAHH	BbMxAHA	BbAvAHA	PSCH	PSCD	PSCA
0	D1	2014-08-22	Bayern Munich	Wolfsburg	2	1	H	1	0	H	...	3.00	22	-2.00	2.35	2.24	1.73	1.68	1.29	6.67	10.58
1	D1	2014-08-23	Dortmund	Leverkusen	0	2	A	0	1	A	...	2.39	26	-1.00	2.08	2.02	1.87	1.84	1.75	4.18	4.77
2	D1	2014-08-23	Ein Frankfurt	Freiburg	1	0	H	1	0	H	...	2.01	22	-0.50	2.05	2.01	1.90	1.86	2.01	3.74	3.92
3	D1	2014-08-23	FC Koln	Hamburg	0	0	D	0	0	D	...	2.00	21	0.00	1.50	1.47	2.92	2.70	2.06	3.62	3.86
4	D1	2014-08-23	Hannover	Schalke 04	2	1	H	0	0	D	...	2.15	22	0.25	1.89	1.84	2.08	2.04	3.10	3.60	2.37

5 rows × 67 columns

Extract teams

`extract_teams`[source]

extract_teams(df, home_team='HomeTeam', away_team='AwayTeam')

teams = extract_teams(df)
teams

['Bayern Munich',
 'Dortmund',
 'Ein Frankfurt',
 'FC Koln',
 'Hannover',
 'Hertha',
 'Hoffenheim',
 'Paderborn',
 "M'gladbach",
 'Augsburg',
 'Hamburg',
 'Leverkusen',
 'Schalke 04',
 'Stuttgart',
 'Werder Bremen',
 'Wolfsburg',
 'Freiburg',
 'Mainz']

Game day

There should be n_teams/2 games per gameday.

`add_gamedays`[source]

add_gamedays(df, home_team='HomeTeam', away_team='AwayTeam')

df = add_gamedays(df)

Points

3 points for the winning team
0 points for the losing team
1 point for each team when they draw

Extract all playing teams (sanity check)
current_points: start with 0 points for each team
home/away_points: empty list
Go trough the matches in order of date (=time of kickoff)
- note current_points for each team
- update current points according to match outcome
add columns for home/away points

Points accumulation

`add_points`[source]

add_points(df, home_team='HomeTeam', away_team='AwayTeam', date='Date')

df = add_points(df)
df.tail()

	Div	Date	HomeTeam	AwayTeam	FTHG	FTAG	FTR	HTHG	HTAG	HTR	...	BbMxAHH	BbAvAHH	BbMxAHA	BbAvAHA	PSCH	PSCD	PSCA	gameday	home_points	away_points
301	D1	2015-05-23	Hamburg	Schalke 04	2	0	H	0	0	D	...	2.11	2.04	1.85	1.80	2.09	3.89	3.54	33	32	48
303	D1	2015-05-23	Hoffenheim	Hertha	2	1	H	1	0	H	...	2.00	1.95	1.98	1.90	2.14	3.57	3.68	33	41	35
302	D1	2015-05-23	Hannover	Freiburg	2	1	H	1	0	H	...	1.97	1.92	1.98	1.92	2.02	3.54	4.11	33	34	34
304	D1	2015-05-23	M'gladbach	Augsburg	1	3	A	1	0	H	...	1.94	1.89	2.02	1.96	1.47	5.00	7.05	33	66	46
305	D1	2015-05-23	Paderborn	Stuttgart	1	2	A	1	1	D	...	2.09	2.02	1.88	1.83	3.56	4.13	2.01	33	31	33

5 rows × 70 columns

Positions

`add_positions`[source]

add_positions(df)

df = add_positions(df)
df.head()

	Div	Date	HomeTeam	AwayTeam	FTHG	FTAG	FTR	HTHG	HTAG	HTR	...	BbMxAHA	BbAvAHA	PSCH	PSCD	PSCA	home_position	away_position
0	D1	2014-08-22	Bayern Munich	Wolfsburg	2	1	H	1	0	H	...	1.73	1.68	1.29	6.67	10.58	1	1
1	D1	2014-08-23	Dortmund	Leverkusen	0	2	A	0	1	A	...	1.87	1.84	1.75	4.18	4.77	1	1
2	D1	2014-08-23	Ein Frankfurt	Freiburg	1	0	H	1	0	H	...	1.90	1.86	2.01	3.74	3.92	1	1
3	D1	2014-08-23	FC Koln	Hamburg	0	0	D	0	0	D	...	2.92	2.70	2.06	3.62	3.86	1	1
4	D1	2014-08-23	Hannover	Schalke 04	2	1	H	0	0	D	...	2.08	2.04	3.10	3.60	2.37	1	1

5 rows × 72 columns

Simple diffs

`add_simple_diffs`[source]

add_simple_diffs(df)

df = add_simple_diffs(df)
df.tail()

	Div	Date	HomeTeam	AwayTeam	FTHG	FTAG	FTR	HTHG	HTAG	HTR	...	PSCH	PSCD	PSCA	gameday	home_points	away_points	home_position	away_position	points_diff	position_diff
301	D1	2015-05-23	Hamburg	Schalke 04	2	0	H	0	0	D	...	2.09	3.89	3.54	33	32	48	14	5	-16	9
303	D1	2015-05-23	Hoffenheim	Hertha	2	1	H	1	0	H	...	2.14	3.57	3.68	33	41	35	8	11	6	-3
302	D1	2015-05-23	Hannover	Freiburg	2	1	H	1	0	H	...	2.02	3.54	4.11	33	34	34	12	12	0	0
304	D1	2015-05-23	M'gladbach	Augsburg	1	3	A	1	0	H	...	1.47	5.00	7.05	33	66	46	3	6	20	-3
305	D1	2015-05-23	Paderborn	Stuttgart	1	2	A	1	1	D	...	3.56	4.13	2.01	33	31	33	15	13	-2	2

5 rows × 74 columns

Result

df.head()

	Div	Date	HomeTeam	AwayTeam	FTHG	FTAG	FTR	HTHG	HTAG	HTR	...	PSCH	PSCD	PSCA	home_position	away_position
0	D1	2014-08-22	Bayern Munich	Wolfsburg	2	1	H	1	0	H	...	1.29	6.67	10.58	1	1
1	D1	2014-08-23	Dortmund	Leverkusen	0	2	A	0	1	A	...	1.75	4.18	4.77	1	1
2	D1	2014-08-23	Ein Frankfurt	Freiburg	1	0	H	1	0	H	...	2.01	3.74	3.92	1	1
3	D1	2014-08-23	FC Koln	Hamburg	0	0	D	0	0	D	...	2.06	3.62	3.86	1	1
4	D1	2014-08-23	Hannover	Schalke 04	2	1	H	0	0	D	...	3.10	3.60	2.37	1	1

5 rows × 74 columns

`results_from_goals`[source]

results_from_goals(df, home_col, away_col)

Compares goals in home_col to goals in away_col, creates [new_col] with results encoded as: -1 -> home win 0 -> draw 1 -> away win

result_df = pd.DataFrame(columns=['home', 'away'])
result_df['home'] = [1,2,3,0]
result_df['away'] = [2,1,3,0]


result_df['result'] = results_from_goals(result_df, 'home', 'away')
result_df.head()

	home	away	result
0	1	2	1
1	2	1	-1
2	3	3	0
3	0	0	0

assert (result_df.result.values == [1,-1,0,0]).all()

Profit odds

odds_df = result_df.copy()
odds_df[['odds_home', 'odds_draw', 'odds_away']] = df.loc[:3, ['B365H', 'B365D', 'B365A']]

odds_df.head()

	home	away	result	odds_home	odds_draw	odds_away
0	1	2	1	1.25	6.00	10.0
1	2	1	-1	1.57	4.33	5.0
2	3	3	0	2.05	3.40	3.6
3	0	0	0	2.00	3.50	3.6

`create_profit_df`[source]

create_profit_df(df, odds_home, odds_draw, odds_away, home_profit='y_home', draw_profit='y_draw', away_profit='y_away', df_result_col='result')

`add_profit_cols`[source]

add_profit_cols(df, odds_home, odds_draw, odds_away, home_profit='y_home', draw_profit='y_draw', away_profit='y_away', df_result_col='result')

create_profit_df(odds_df, 'odds_home', 'odds_draw', 'odds_away')

	y_home	y_draw	y_away
0	-1.00	-1.0	9.0
1	0.57	-1.0	-1.0
2	-1.00	2.4	-1.0
3	-1.00	2.5	-1.0

add_profit_cols(odds_df, 'odds_home', 'odds_draw', 'odds_away')

	home	away	result	odds_home	odds_draw	odds_away	y_home	y_draw	y_away
0	1	2	1	1.25	6.00	10.0	-1.00	-1.0	9.0
1	2	1	-1	1.57	4.33	5.0	0.57	-1.0	-1.0
2	3	3	0	2.05	3.40	3.6	-1.00	2.4	-1.0
3	0	0	0	2.00	3.50	3.6	-1.00	2.5	-1.0

Normalizer

df = pd.DataFrame([[1,2,3], [4,5,6]], columns=['a', 'b', 'c'])
df

	a	b	c
0	1	2	3
1	4	5	6

`normalize_by_args`[source]

normalize_by_args(x, mean, std)

`normalize_col`[source]

normalize_col(col)

`class` `ColumnNormalizer`[source]

ColumnNormalizer(columns, names=None)

normalize_by_args(df.a, 5, 2.)

0   -2.0
1   -0.5
Name: a, dtype: float64

normalize_col(df.a)

0   -0.707107
1    0.707107
Name: a, dtype: float64

ColumnNormalizer(df.loc[:,['a', 'b']]), ColumnNormalizer.from_df(df, ['a','b'])

(Mean: 3.0 | Std: 1.5811388300841898 | Names: unknown,
 Mean: 3.0 | Std: 1.5811388300841898 | Names: ['a', 'b'])

norm = ColumnNormalizer.from_df(df, ['a','b'])

df.loc[:, norm.names] = norm(df.loc[:, norm.names])
df

	a	b	c
0	-1.264911	-0.632456	3
1	0.632456	1.264911	6

assert df.loc[:, ['a', 'b']].values.mean()==0., 'ColumnNormalizer should produce 0 mean.'
assert df.loc[:, ['a', 'b']].values.std()==1., 'ColumnNormalizer should produce 1. std.'

Profit loss

Computes the outcome of a betting allocation. The loss function gets negated to make minimizing achieve the goal we're aiming for.

`odds_loss`[source]

odds_loss(actual, target)

Compute the mean negative profit

`odds_profit`[source]

odds_profit(actual, target)

Compute the total profit

odds_df = pd.DataFrame([[3.2, 3.25, 2.29], [1.91, 3.3, 3.93]], columns=['home', 'draw', 'away'])
odds_df.head()

	home	draw	away
0	3.20	3.25	2.29
1	1.91	3.30	3.93

allocations = torch.tensor([[.5, .2, .3], [.8, .05, .15]])

odds_loss(allocations, odds_df.values), odds_profit(allocations, odds_df.values)

(tensor(-2.8457, dtype=torch.float64), tensor(5.6913, dtype=torch.float64))

Read the example df

read_football_csv[source]

Extract teams

extract_teams[source]

Game day

add_gamedays[source]

Points

Points accumulation

add_points[source]

Positions

add_positions[source]

Simple diffs

add_simple_diffs[source]

Result

results_from_goals[source]

Profit odds

create_profit_df[source]

add_profit_cols[source]