Choosing a Fantasy Football Team

co-starring: IPython Notebook, matplotlib, and lp_solve

Bill Mill


billmill.org
github.com/llimllib
@llimllib

This talk is available at: billmill.org/fantasypl

You have a limited budget

And each player costs a certain amount

That sounds like an optimization problem...

I bet I could solve that with code

In []:
import requests, cPickle, shutil, time

all = {}
errorout = open("errors.log", "w")

for i in range(600):
    playerurl = "http://fantasy.premierleague.com/web/api/elements/%s/"
    r = requests.get(playerurl % i)

    # skip non-existent players
    if r.status_code != 200: continue

    all[i] = r.json()

cPickle.dump(all, outfile)
In [63]:
website("http://ipython.org/notebook.html")
Out[63]:
In [5]:
import cPickle
players = cPickle.load(open("players.data.pickle"))
players[1]
Out[5]:
{u'added': u'2013-07-15T13:21:12 UTC+0000',
 u'code': 37096,
 u'current_fixture': u'Crystal Palace (H)',
 u'element_type_id': 1,
 u'event_cost': 42,
 u'event_explain': [[u'Minutes played', 0, 0]],
 u'event_points': 0,
 u'event_total': 0,
 u'first_name': u'Lukasz',
 u'fixture_history': {u'all': [[u'17 Aug 15:00',
    1,
    u'AVL(H) 1-3',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    45,
    0],
   [u'24 Aug 12:45',
    2,
    u'FUL(A) 3-1',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -1322,
    45,
    0],
   [u'01 Sep 16:00',
    3,
    u'TOT(H) 1-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -1350,
    45,
    0],
   [u'14 Sep 15:00',
    4,
    u'SUN(A) 3-1',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -1259,
    44,
    0],
   [u'22 Sep 13:30',
    5,
    u'STK(H) 3-1',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -1091,
    44,
    0],
   [u'28 Sep 17:30',
    6,
    u'SWA(A) 2-1',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -852,
    44,
    0],
   [u'06 Oct 16:00',
    7,
    u'WBA(A) 1-1',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -712,
    44,
    0],
   [u'19 Oct 15:00',
    8,
    u'NOR(H) 4-1',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -388,
    44,
    0],
   [u'26 Oct 12:45',
    9,
    u'CRY(A) 2-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -343,
    43,
    0],
   [u'02 Nov 17:30',
    10,
    u'LIV(H) 2-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -218,
    43,
    0],
   [u'10 Nov 16:10',
    11,
    u'MUN(A) 0-1',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -247,
    43,
    0],
   [u'23 Nov 15:00',
    12,
    u'SOU(H) 2-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -221,
    43,
    0],
   [u'30 Nov 15:00',
    13,
    u'CAR(A) 3-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -257,
    43,
    0],
   [u'04 Dec 19:45',
    14,
    u'HUL(H) 2-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -283,
    43,
    0],
   [u'08 Dec 16:00',
    15,
    u'EVE(H) 1-1',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -251,
    43,
    0],
   [u'14 Dec 12:45',
    16,
    u'MCI(A) 3-6',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -213,
    42,
    0],
   [u'23 Dec 20:00',
    17,
    u'CHE(H) 0-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -123,
    42,
    0],
   [u'26 Dec 15:00',
    18,
    u'WHU(A) 3-1',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -105,
    42,
    0],
   [u'29 Dec 13:30',
    19,
    u'NEW(A) 1-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -103,
    42,
    0],
   [u'01 Jan 15:00',
    20,
    u'CAR(H) 2-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -83,
    42,
    0],
   [u'13 Jan 20:00',
    21,
    u'AVL(A) 2-1',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -17,
    42,
    0],
   [u'18 Jan 15:00',
    22,
    u'FUL(H) 2-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -32,
    42,
    0],
   [u'28 Jan 19:45',
    23,
    u'SOU(A) 2-2',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -35,
    42,
    0],
   [u'02 Feb 16:00',
    24,
    u'CRY(H) 2-0',
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    -6,
    42,
    0]],
  u'summary': [[22, u'FUL (H)', 0], [23, u'SOU (A)', 0], [24, u'CRY (H)', 0]]},
 u'fixtures': {u'all': [[u'08 Feb 12:45', u'Gameweek 25', u'Liverpool (A)'],
   [u'12 Feb 19:45', u'Gameweek 26', u'Man Utd (H)'],
   [u'22 Feb 15:00', u'Gameweek 27', u'Sunderland (H)'],
   [u'01 Mar 15:00', u'Gameweek 28', u'Stoke City (A)'],
   [u'08 Mar 15:00', u'Gameweek 29', u'Swansea (H)'],
   [u'16 Mar 16:00', u'Gameweek 30', u'Tottenham (A)'],
   [u'22 Mar 12:45', u'Gameweek 31', u'Chelsea (A)'],
   [u'29 Mar 17:30', u'Gameweek 32', u'Man City (H)'],
   [u'05 Apr 15:00', u'Gameweek 33', u'Everton (A)'],
   [u'12 Apr 15:00', u'Gameweek 34', u'West Ham (H)'],
   [u'19 Apr 15:00', u'Gameweek 35', u'Hull City (A)'],
   [u'26 Apr 15:00', u'Gameweek 36', u'Newcastle (H)'],
   [u'03 May 15:00', u'Gameweek 37', u'West Brom (H)'],
   [u'11 May 15:00', u'Gameweek 38', u'Norwich (A)']],
  u'summary': [[25, u'LIV (A)', u'08 Feb 12:45'],
   [26, u'MUN (H)', u'12 Feb 19:45'],
   [27, u'SUN (H)', u'22 Feb 15:00']]},
 u'form': 0.0,
 u'id': 1,
 u'in_dreamteam': False,
 u'last_season_points': 0,
 u'max_cost': 45,
 u'min_cost': 42,
 u'news': u'',
 u'news_added': None,
 u'news_return': None,
 u'news_updated': None,
 u'next_fixture': u'Liverpool (A)',
 u'now_cost': 42,
 u'original_cost': 45,
 u'photo_mobile_url': u'http://cdn.ismfg.net/static/plfpl/img/shirts/photos/37096.jpg',
 u'points_per_game': 0.0,
 u'season_history': [[u'2007/08',
   248,
   0,
   0,
   0,
   2,
   0,
   0,
   0,
   0,
   0,
   8,
   0,
   0,
   0,
   46,
   15],
  [u'2008/09', 463, 0, 0, 0, 10, 0, 0, 0, 0, 0, 18, 0, 0, 0, 46, 20],
  [u'2009/10', 360, 0, 0, 0, 5, 0, 0, 0, 1, 0, 11, 0, 0, 0, 49, 15],
  [u'2010/11', 1260, 0, 1, 5, 14, 0, 0, 0, 0, 0, 33, 2, 0, 0, 45, 55],
  [u'2011/12', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 0],
  [u'2012/13', 360, 0, 0, 1, 3, 0, 0, 0, 0, 0, 7, 0, 57, 0, 42, 13]],
 u'second_name': u'Fabianski',
 u'selected': 17295,
 u'selected_by': u'0.5',
 u'shirt_image_url': u'http://cdn.ismfg.net/static/plfpl/img/shirts/shirt_1_1.png',
 u'shirt_mobile_image_url': u'http://cdn.ismfg.net/static/plfpl/img/shirts/mobile/shirt_1_1.png',
 u'squad_number': None,
 u'status': u'a',
 u'team_code': 37096,
 u'team_id': 1,
 u'team_name': u'Arsenal',
 u'total_points': 0,
 u'transfers_in': 4112,
 u'transfers_in_event': 31,
 u'transfers_out': 13666,
 u'transfers_out_event': 74,
 u'type_name': u'Goalkeeper',
 u'web_name': u'Fabianski'}
In [6]:
%matplotlib inline
matplotlib.rc('font', size=18)
figsize(12, 4)
import numpy as np
import re
import StringIO
#import prettyplotlib as ppl
#dir(ppl)
In [25]:
points = {}
for p in players.itervalues():
    for game in p['fixture_history']['all']:
        if game[3] > 0:
            points.setdefault(p['type_name'], []).append(game[19])

def poshist(axis, position):
    axis.hist(points[position])
    axis.set_title(position)
    return axis

fig, (ax0, ax1, ax2, ax3) = plt.subplots(ncols=4, sharey=True, figsize=(18,4))
poshist(ax0, "Forward")
poshist(ax1, "Midfielder")
poshist(ax2, "Defender")
poshist(ax3, "Goalkeeper")
fig.show()
In [8]:
opponents = {}
for player in players.itervalues():
    for game in player["fixture_history"]["all"]:
        #skip games where the player played 0 minutes
        if game[3] == 0: continue
        opp = game[2][:3]
        pts = game[19]
        opponents.setdefault(opp, [0,0])[0] += pts
        opponents[opp][1] += 1

from collections import OrderedDict
avgs = {}
for opponent, (score, n) in opponents.iteritems():
    avgs[opponent] = score/float(n)

sorted_avgs = OrderedDict(sorted(avgs.items(), key=lambda t: t[1]))

fig, ax = plt.subplots(figsize=(18,4))
x_pos = np.arange(0, len(sorted_avgs.keys()))
ax.set_xticks(x_pos)
ax.set_xticklabels(sorted_avgs.keys(), rotation=45)
ax.plot(x_pos, sorted_avgs.values(), linewidth=3)
fig.show()
In [27]:
avg_opponent = sum(avgs.values())/float(len(avgs))
In [29]:
team_abbreviations = {
    'Norwich': 'NOR',
    'Cardiff City': 'CAR',
    'Man City': 'MCI',
    'Newcastle': 'NEW',
    'West Brom': 'WBA',
    'West Ham': 'WHU',
    'Southampton': 'SOU',
    'Sunderland': 'SUN',
    'Stoke City': 'STK',
    'Crystal Palace': 'CRY',
    'Arsenal': 'ARS',
    'Swansea': 'SWA',
    'Liverpool': 'LIV',
    'Hull City': 'HUL',
    'Man Utd': 'MUN',
    'Everton': 'EVE',
    'Fulham': 'FUL',
    'Tottenham': 'TOT',
    'Aston Villa': 'AVL',
    'Chelsea': 'CHE',
}

class Game(object):
    def __init__(self, game_json):
        self.opp = game_json[2][:3]
        self.loc = game_json[2][4] # "A" for away, "H" for home
        self.points = game_json[19]
        self.minutes = game_json[3]
    
    def __repr__(self):
        return "Game vs. %s %s: %s pts" % (self.opp, self.loc, self.points)
In [30]:
class Player(object):
    def __init__(self, player_json):
        self.raw = player_json
        self.games = [Game(g) for g in player_json["fixture_history"]["all"]]
        self.name = u"{first_name} {second_name}".format(**player_json)
        self.cost = player_json["event_cost"]
        self.position = player_json["type_name"]
        self.team = team_abbreviations[player_json["team_name"]]
        self.idn = player_json["id"]
        self.news = player_json["news"]
        self.news_return = player_json["news_return"]
        self.pos = self.shortname(self.position)
        self.upcoming = self.get_upcoming_fixtures(player_json["fixtures"]["all"])
    
    def get_upcoming_fixtures(self, fixtures):
        upcoming = []
        for _, gameweek, opponent in fixtures:
            week = int(gameweek.split()[-1])
            if opponent == "-":
                continue
            opp, loc = opponent.split('(')
            opp = team_abbreviations[opp.strip()]
            loc = loc[0]
            upcoming.append((week, opp, loc))
        return upcoming
    
    def shortname(self, position):
        pos_abbreviations = {
            "Goalkeeper": "gk",
            "Defender": "d",
            "Midfielder": "m",
            "Forward": "f"
        }
        
        return pos_abbreviations[position]
    
    def __repr__(self):
        return "#%s %s %s £%s %s" % (self.idn, self.team, self.name.encode("ascii", "ignore"), self.cost, self.pos)

    def __unicode__(self):
        return "#%s %s £%s %s" % (self.idn, self.name, self.cost, self.pos)

player_objs = [Player(p) for p in players.itervalues()]

def find_player(needle):
    return [p for p in player_objs if needle.lower() in p.name.lower()]
In [31]:
p = find_player('Van Persie')[0]
print p.name
print p.position
print p.cost
print p.idn
print p.upcoming[:3]  # upcoming games
print p.games[:3]     # games he's already played
print p
Robin van Persie
Forward
136
264
[(25, 'FUL', u'H'), (26, 'ARS', u'A'), (27, 'CRY', u'A')]
[Game vs. SWA A: 12 pts, Game vs. CHE H: 2 pts, Game vs. LIV A: 1 pts]
#264 MUN Robin van Persie £136 f

In [11]:
homeaway = {"A": 0, "H": 0}
n = 0.
for player in player_objs:
    #only consider full games to eliminate minute bias
    for game in [p for p in player.games if p.minutes == 90]:
        homeaway[game.loc] += game.points
        n += 1

homeaway["A"] /= n
homeaway["H"] /= n

homefield = homeaway["H"] - homeaway["A"]

print homefield, homeaway

fig, ax = plt.subplots(figsize=(2,4))
x_pos = np.arange(0, len(homeaway.keys()))
ax.set_xticks(x_pos+.4)
ax.set_xticklabels(["Away", "Home"], rotation=45)
ax.bar(x_pos, homeaway.values())
fig.show()
0.32794340576 {'A': 1.6139464375947448, 'H': 1.9418898433552298}

\(ev = \sum_{i=0}^5 opp_i + home_i + adj\_avg\)

In [15]:
def adjusted_score(game):
    pts = game.points
    pts += homefield/2 if game.loc == "A" else -homefield/2
    pts += avg_opponent - avgs[game.opp]
    return pts
    
def adjusted_average(player):
    return sum(adjusted_score(g) for g in player.games) / len(player.games)

def game_value(game):
    adj = 0
    adj += homefield/2 if game[2] == "H" else -homefield/2
    adj += avgs[game[1]] - avg_opponent
    return adj
    
def expected_points(player, n=5):
    """return the number of expected points in the next n games"""
    av = adjusted_average(player)
    ev = 0.
    for game in player.upcoming[:n]:
        ev += av + game_value(game)
    return ev/n

print expected_points(find_player(u"Mutch")[0])
print expected_points(find_player(u"Suárez")[0])
print expected_points(find_player(u"Sanogo")[0])
3.41602554986
8.14701669057
-0.131252429959

In [16]:
# re-create our player objects, this time with our monkey-patched expected points function.
# In Real Life™, I would have just gone back to the player object and put it there, but that
# wouldn't make sense in this presentation, since I don't introduce the model until later.
Player.expected_points = expected_points
player_objs = [Player(p) for p in players.itervalues()]
player_objs[1].expected_points()
Out[16]:
4.3687475700409664

Given the constraints:

  • Total player cost < 100
  • 2 goalkeepers
  • 5 defenders
  • 5 midfielders
  • 3 forwards

Maximize expected team value

Given this simple optimization problem:

x1 >= 1
x2 >= 1
x1 + x2 >= 2
minimize x1 + x2 where x1 is an integer

lp_solve takes a file that looks like:

min: x1 + x2;
x1 >= 1;
x2 >= 1;
x1 + x2 >= 2;
int x1, x2;


Which, when run, results in:

$ lp_solve /tmp/simple_example
Value of objective function: 2.00000000
Actual values of the variables:
x1                              1
x2                              1
max: 5.6 gk1 + 4.3 mf2 + …;  /* maximize expected points */
3.7 gk1 + 9.3 mf2 + … < 100; /* team must cost <100£     */
gk1 + gk12 + gk34 + … = 2;   /* limit to 2 goalkeepers   */
d3 + d4 + d23 + … = 5;       /* limit to 5 defenders     */
…                            /* repeat for all positions */
bin gk1, mf2, d3, f4, d5, …; /* all variables are binary */
In [17]:
def objective_function():
    m = " + ".join("{ev} {p.pos}{p.idn}".format(p=p, ev=p.expected_points())
                   for p in player_objs)
    
    return "max: " + m + ";\n"
In [18]:
def cost_constraint(max_price):
    c = " + ".join("{p.cost} {p.pos}{p.idn}".format(p=p)
                   for p in player_objs)
    
    return "cost_constraint: " + c + " <= %s;\n" % max_price
In [19]:
def position_constraints():
    constraints = StringIO.StringIO()

    gks = [p for p in player_objs if p.position == "Goalkeeper"]
    gk_list = " + ".join(("gk{p.idn}".format(**locals()) for p in gks))
    constraints.write("gk_limit: " + gk_list + " = 2;\n")
    
    ds = [p for p in player_objs if p.position == "Defender"]
    d_list = " + ".join(("d{p.idn}".format(**locals()) for p in ds))
    constraints.write("d_limit: " + d_list + " = 5;\n")
    
    ms = [p for p in player_objs if p.position == "Midfielder"]
    m_list = " + ".join(("m{p.idn}".format(**locals()) for p in ms))
    constraints.write("m_limit: " + m_list + " = 5;\n")
    
    fs = [p for p in player_objs if p.position == "Forward"]
    f_list = " + ".join(("f{p.idn}".format(**locals()) for p in fs))
    constraints.write("f_limit: " + f_list + " = 3;\n")
    
    return constraints.getvalue()
In [20]:
#create a buffer to hold all the constraints
buf = StringIO.StringIO()
buf.write(objective_function())
buf.write(cost_constraint(1000))
buf.write(position_constraints())

# I've skipped this, it's probably easier to skip the declaration of all the variable names?
# not very exciting
def all_player_variables():
    variables = ", ".join("{p.pos}{p.idn}".format(**locals()) for p in player_objs)
    return "bin %s;\n" % variables

buf.write(all_player_variables())
In [24]:
import subprocess, re

def get_player(idn):
    """given an id, return a player"""
    for p in player_objs:
        if p.idn == idn: return p
    raise ValueError("Unable to find player")
    
def return_team(lp):
    """run lp_solve ands return a list of player objects"""
    cmd = "echo '%s' | lp_solve" % lp
    val = subprocess.check_output(cmd, shell=True).split('\n')
    get_id = lambda l: int(re.search("^\w+?(\d+)", l).group(1))
    team_ids = [get_id(l) for l in val if re.search(r" 1$", l)]
    return map(get_player, team_ids)

return_team(buf.getvalue())
Out[24]:
[#2 ARS Wojciech Szczesny £60 gk,
 #8 ARS Per Mertesacker £66 d,
 #46 AVL Leandro Bacuna £44 m,
 #63 CAR Pete Whittingham £53 m,
 #69 CAR Jordan Mutch £46 m,
 #82 CHE John Terry £67 d,
 #130 EVE Seamus Coleman £66 d,
 #214 LIV Luis Surez £134 f,
 #232 MCI Gnegneri Yaya Tour £101 m,
 #297 NOR John Ruddy £49 gk,
 #326 SOU Jose Fonte £52 d,
 #328 SOU Luke Shaw £49 d,
 #333 SOU Adam Lallana £77 m,
 #342 SOU Rickie Lambert £70 f,
 #343 SOU Jay Rodriguez £64 f]
In [34]:
%run talktools
In []: