1 from collections import namedtuple
2 import os
3 import os.path as path
4 import gzip
5 import json
6 import sys
7 import urllib2
8
9 from nflgame import OrderedDict
10 import nflgame.player
11 import nflgame.schedule
12 import nflgame.seq
13 import nflgame.statmap
14
15 _MAX_INT = sys.maxint
16
17 _jsonf = path.join(path.split(__file__)[0], 'gamecenter-json', '%s.json.gz')
18 _json_base_url = "http://www.nfl.com/liveupdate/game-center/%s/%s_gtd.json"
19
20 GameDiff = namedtuple('GameDiff', ['before', 'after', 'plays', 'players'])
21 """
22 Represents the difference between two points in time of the same game
23 in terms of plays and player statistics.
24 """
25
26 TeamStats = namedtuple('TeamStats',
27 ['first_downs', 'total_yds', 'passing_yds',
28 'rushing_yds', 'penalty_cnt', 'penalty_yds',
29 'turnovers', 'punt_cnt', 'punt_yds', 'punt_avg',
30 'pos_time'])
31 """A collection of team statistics for an entire game."""
32
33
35 """
36 Represents field position.
37
38 The representation here is an integer offset where the 50 yard line
39 corresponds to '0'. Being in the own territory corresponds to a negative
40 offset while being in the opponent's territory corresponds to a positive
41 offset.
42
43 e.g., NE has the ball on the NE 45, the offset is -5.
44 e.g., NE has the ball on the NYG 2, the offset is 48.
45 """
46 - def __new__(cls, pos_team, yardline):
47 if not yardline:
48 return None
49 return object.__new__(cls)
50
52 """
53 pos_team is the team on offense, and yardline is a string formatted
54 like 'team-territory yard-line'. e.g., "NE 32".
55 """
56 if yardline == '50':
57 self.offset = 0
58 return
59
60 territory, yd_str = yardline.split()
61 yd = int(yd_str)
62 if territory == pos_team:
63 self.offset = -(50 - yd)
64 else:
65 self.offset = 50 - yd
66
68 return cmp(self.offset, other.offset)
69
71 return '%d' % self.offset
72
73
75 """
76 Represents the amount of time a drive lasted in (minutes, seconds).
77 """
79 self.clock = clock
80
81 try:
82 self.minutes, self.seconds = map(int, self.clock.split(':'))
83 except ValueError:
84 self.minutes, self.seconds = 0, 0
85
87 """
88 Returns the total number of seconds that this possession lasted for.
89 """
90 return self.seconds + self.minutes * 60
91
93 a, b = (self.minutes, self.seconds), (other.minutes, other.seconds)
94 return cmp(a, b)
95
103
112
115
116
118 """
119 Represents the current time in a game. Namely, it keeps track of the
120 quarter and clock time. Also, GameClock can represent whether
121 the game hasn't started yet, is half time or if it's over.
122 """
124 self.qtr = qtr
125 self.clock = clock
126
127 try:
128 self.__minutes, self.__seconds = map(int, self.clock.split(':'))
129 except ValueError:
130 self.__minutes, self.__seconds = 0, 0
131 except AttributeError:
132 self.__minutes, self.__seconds = 0, 0
133 try:
134 self.__qtr = int(self.qtr)
135 if self.__qtr >= 3:
136 self.__qtr += 1
137 except ValueError:
138 if self.is_pregame():
139 self.__qtr = 0
140 elif self.is_halftime():
141 self.__qtr = 3
142 elif self.is_final():
143 self.__qtr = sys.maxint
144 else:
145 assert False, 'Unknown QTR value: "%s"' % self.qtr
146
148 return self.qtr == 'Pregame'
149
151 return self.qtr == 'Halftime'
152
154 return 'final' in self.qtr.lower()
155
157 if self.__qtr != other.__qtr:
158 return cmp(self.__qtr, other.__qtr)
159 elif self.__minutes != other.__minutes:
160 return cmp(other.__minutes, self.__minutes)
161 return cmp(other.__seconds, self.__seconds)
162
164 """
165 Returns a nicely formatted string indicating the current time of the
166 game. Examples include "Q1 10:52", "Q4 1:25", "Pregame", "Halftime"
167 and "Final".
168 """
169 try:
170 q = int(self.qtr)
171 return 'Q%d %s' % (q, self.clock)
172 except ValueError:
173 return self.qtr
174
175
176 -class Game (object):
177 """
178 Game represents a single pre- or regular-season game. It provides a window
179 into the statistics of every player that played into the game, along with
180 the winner of the game, the score and a list of all the scoring plays.
181 """
182
183 - def __new__(cls, eid=None, fpath=None):
184
185 try:
186 rawData = _get_json_data(eid, fpath)
187 except urllib2.URLError:
188 return None
189 if rawData is None or rawData.strip() == '{}':
190 return None
191 game = object.__new__(cls)
192 game.rawData = rawData
193
194 try:
195 if eid is not None:
196 game.eid = eid
197 game.data = json.loads(game.rawData)[game.eid]
198 else:
199 game.eid = None
200 game.data = json.loads(game.rawData)
201 for k, v in game.data.iteritems():
202 if isinstance(v, dict):
203 game.eid = k
204 game.data = v
205 break
206 assert game.eid is not None
207 except ValueError:
208 return None
209
210 return game
211
212 - def __init__(self, eid=None, fpath=None):
213 """
214 Creates a new Game instance given a game identifier.
215
216 The game identifier is used by NFL.com's GameCenter live update web
217 pages. It is used to construct a URL to download JSON data for the
218 game.
219
220 If the game has been completed, the JSON data will be cached to disk
221 so that subsequent accesses will not re-download the data but instead
222 read it from disk.
223
224 When the JSON data is written to disk, it is compressed using gzip.
225 """
226
227 self.schedule = nflgame.schedule.games_byid.get(self.eid, None)
228
229
230 self.home = self.data['home']['abbr']
231 self.away = self.data['away']['abbr']
232 self.stats_home = _json_team_stats(self.data['home']['stats']['team'])
233 self.stats_away = _json_team_stats(self.data['away']['stats']['team'])
234
235
236 self.gamekey = nflgame.schedule.games_byid[self.eid]['gamekey']
237 self.time = GameClock(self.data['qtr'], self.data['clock'])
238 self.down = _tryint(self.data['down'])
239 self.togo = _tryint(self.data['togo'])
240 self.score_home = int(self.data['home']['score']['T'])
241 self.score_away = int(self.data['away']['score']['T'])
242 for q in (1, 2, 3, 4, 5):
243 for team in ('home', 'away'):
244 score = self.data[team]['score'][str(q)]
245 self.__dict__['score_%s_q%d' % (team, q)] = int(score)
246
247 if not self.game_over():
248 self.winner = None
249 else:
250 if self.score_home > self.score_away:
251 self.winner = self.home
252 self.loser = self.away
253 elif self.score_away > self.score_home:
254 self.winner = self.away
255 self.loser = self.home
256 else:
257 self.winner = '%s/%s' % (self.home, self.away)
258 self.loser = '%s/%s' % (self.home, self.away)
259
260
261 self.scores = []
262 for k in sorted(map(int, self.data['scrsummary'])):
263 play = self.data['scrsummary'][str(k)]
264 s = '%s - Q%d - %s - %s' \
265 % (play['team'], play['qtr'], play['type'], play['desc'])
266 self.scores.append(s)
267
268
269 if self.game_over() and not os.access(_jsonf % eid, os.R_OK):
270 self.save()
271
273 """Returns true if team (i.e., 'NE') is the home team."""
274 return team == self.home
275
277 """Returns the year of the season this game belongs to."""
278 year = int(self.eid[0:4])
279 month = int(self.eid[4:6])
280 if month <= 3:
281 year -= 1
282 return year
283
285 """game_over returns true if the game is no longer being played."""
286 return self.time.is_final()
287
289 """playing returns true if the game is currently being played."""
290 return not self.time.is_pregame() and not self.time.is_final()
291
292 - def save(self, fpath=None):
293 """
294 Save the JSON data to fpath. This is done automatically if the
295 game is over.
296 """
297 if fpath is None:
298 fpath = _jsonf % self.eid
299 try:
300 print >> gzip.open(fpath, 'w+'), self.rawData,
301 except IOError:
302 print >> sys.stderr, "Could not cache JSON data. Please " \
303 "make '%s' writable." \
304 % os.path.dirname(fpath)
305
307 """
308 Returns a string of the score of the game.
309 e.g., "NE (32) vs. NYG (0)".
310 """
311 return '%s (%d) vs. %s (%d)' \
312 % (self.home, self.score_home, self.away, self.score_away)
313
315 """
316 Returns a GenPlayers sequence of player statistics that combines
317 game statistics and play statistics by taking the max value of
318 each corresponding statistic.
319
320 This is useful when accuracy is desirable. Namely, using only
321 play-by-play data or using only game statistics can be unreliable.
322 That is, both are inconsistently correct.
323
324 Taking the max values of each statistic reduces the chance of being
325 wrong (particularly for stats that are in both play-by-play data
326 and game statistics), but does not eliminate them.
327 """
328 game_players = list(self.players)
329 play_players = list(self.drives.plays().players())
330 max_players = OrderedDict()
331
332
333
334
335
336
337 for pplay in play_players:
338 newp = nflgame.player.GamePlayerStats(pplay.playerid,
339 pplay.name, pplay.home,
340 pplay.team)
341 maxstats = {}
342 for stat, val in pplay._stats.iteritems():
343 maxstats[stat] = val
344
345 newp._overwrite_stats(maxstats)
346 max_players[pplay.playerid] = newp
347
348 for newp in max_players.itervalues():
349 for pgame in game_players:
350 if pgame.playerid != newp.playerid:
351 continue
352
353 maxstats = {}
354 for stat, val in pgame._stats.iteritems():
355 maxstats[stat] = max([val,
356 newp._stats.get(stat, -_MAX_INT)])
357
358 newp._overwrite_stats(maxstats)
359 break
360 return nflgame.seq.GenPlayerStats(max_players)
361
363 if name == 'players':
364 self.__players = _json_game_player_stats(self, self.data)
365 self.players = nflgame.seq.GenPlayerStats(self.__players)
366 return self.players
367 if name == 'drives':
368 self.__drives = _json_drives(self, self.home, self.data['drives'])
369 self.drives = nflgame.seq.GenDrives(self.__drives)
370 return self.drives
371
373 return diff(other, self)
374
377
378
379 -def diff(before, after):
380 """
381 Returns the difference between two points of time in a game in terms of
382 plays and player statistics. The return value is a GameDiff namedtuple
383 with two attributes: plays and players. Each contains *only* the data
384 that is in the after game but not in the before game.
385
386 This is useful for sending alerts where you're guaranteed to see each
387 play statistic only once (assuming NFL.com behaves itself).
388 """
389 assert after.eid == before.eid
390
391 plays = []
392 after_plays = list(after.drives.plays())
393 before_plays = list(before.drives.plays())
394 for play in after_plays:
395 if play not in before_plays:
396 plays.append(play)
397
398
399
400
401
402
403 _players = OrderedDict()
404 after_players = list(after.max_player_stats())
405 before_players = list(before.max_player_stats())
406 for aplayer in after_players:
407 has_before = False
408 for bplayer in before_players:
409 if aplayer.playerid == bplayer.playerid:
410 has_before = True
411 pdiff = aplayer - bplayer
412 if pdiff is not None:
413 _players[aplayer.playerid] = pdiff
414 if not has_before:
415 _players[aplayer.playerid] = aplayer
416 players = nflgame.seq.GenPlayerStats(_players)
417
418 return GameDiff(before=before, after=after, plays=plays, players=players)
419
420
422 """
423 Drive represents a single drive in an NFL game. It contains a list
424 of all plays that happened in the drive, in chronological order.
425 It also contains meta information about the drive such as the start
426 and stop times and field position, length of possession, the number
427 of first downs and a short descriptive string of the result of the
428 drive.
429
430 """
431 - def __init__(self, game, drive_num, home_team, data):
432 if data is None:
433 return
434 self.game = game
435 self.drive_num = drive_num
436 self.team = data['posteam']
437 self.home = self.team == home_team
438 self.first_downs = int(data['fds'])
439 self.result = data['result']
440 self.penalty_yds = int(data['penyds'])
441 self.total_yds = int(data['ydsgained'])
442 self.pos_time = PossessionTime(data['postime'])
443 self.play_cnt = int(data['numplays'])
444 self.field_start = FieldPosition(self.team, data['start']['yrdln'])
445 self.time_start = GameClock(data['start']['qtr'],
446 data['start']['time'])
447
448
449
450 if data['end']['yrdln'].strip():
451 self.field_end = FieldPosition(self.team, data['end']['yrdln'])
452 else:
453 self.field_end = None
454 playids = sorted(map(int, data['plays'].keys()), reverse=True)
455 for pid in playids:
456 yrdln = data['plays'][str(pid)]['yrdln'].strip()
457 if yrdln:
458 self.field_end = FieldPosition(self.team, yrdln)
459 break
460 if self.field_end is None:
461 self.field_end = FieldPosition(self.team, '50')
462
463
464
465
466 lastplayid = str(sorted(map(int, data['plays'].keys()))[-1])
467 endqtr = data['plays'][lastplayid]['qtr']
468 self.time_end = GameClock(endqtr, data['end']['time'])
469
470 self.__plays = _json_plays(self, data['plays'])
471 self.plays = nflgame.seq.GenPlays(self.__plays)
472
474 """
475 Adds the statistics of two drives together.
476
477 Note that once two drives are added, the following fields
478 automatically get None values: result, field_start, field_end,
479 time_start and time_end.
480 """
481 assert self.team == other.team, \
482 'Cannot add drives from different teams "%s" and "%s".' \
483 % (self.team, other.team)
484 new_drive = Drive(None, 0, '', None)
485 new_drive.team = self.team
486 new_drive.home = self.home
487 new_drive.first_downs = self.first_downs + other.first_downs
488 new_drive.penalty_yds = self.penalty_yds + other.penalty_yds
489 new_drive.total_yds = self.total_yds + other.total_yds
490 new_drive.pos_time = self.pos_time + other.pos_time
491 new_drive.play_cnt = self.play_cnt + other.play_cnt
492 new_drive.__plays = self.__plays + other.__plays
493 new_drive.result = None
494 new_drive.field_start = None
495 new_drive.field_end = None
496 new_drive.time_start = None
497 new_drive.time_end = None
498 return new_drive
499
501 return '%s (Start: %s, End: %s) %s' \
502 % (self.team, self.time_start, self.time_end, self.result)
503
504
505 -class Play (object):
506 """
507 Play represents a single play. It contains a list of all players
508 that participated in the play (including offense, defense and special
509 teams). The play also includes meta information about what down it
510 is, field position, clock time, etc.
511
512 Play objects also contain team-level statistics, such as whether the
513 play was a first down, a fourth down failure, etc.
514 """
515 - def __init__(self, drive, playid, data):
516 self.data = data
517 self.drive = drive
518 self.playid = playid
519 self.team = data['posteam']
520 self.home = self.drive.home
521 self.desc = data['desc']
522 self.note = data['note']
523 self.down = int(data['down'])
524 self.yards_togo = int(data['ydstogo'])
525 self.touchdown = 'touchdown' in self.desc.lower()
526 self._stats = {}
527
528 if not self.team:
529 self.time, self.yardline = None, None
530 else:
531 self.time = GameClock(data['qtr'], data['time'])
532 self.yardline = FieldPosition(self.team, data['yrdln'])
533
534
535
536 if '0' in data['players']:
537 for info in data['players']['0']:
538 if info['statId'] not in nflgame.statmap.idmap:
539 continue
540 statvals = nflgame.statmap.values(info['statId'],
541 info['yards'])
542 for k, v in statvals.iteritems():
543 v = self.__dict__.get(k, 0) + v
544 self.__dict__[k] = v
545 self._stats[k] = v
546
547
548 self.events = _json_play_events(data['players'])
549
550
551
552
553
554 self.__players = _json_play_players(self, data['players'])
555 self.players = nflgame.seq.GenPlayerStats(self.__players)
556 for p in self.players:
557 for k, v in p.stats.iteritems():
558
559
560
561
562 self.__dict__[k] = v
563 self._stats[k] = v
564
566 """Whether a player with id playerid participated in this play."""
567 return playerid in self.__players
568
570 if self.team:
571 if self.down != 0:
572 return '(%s, %s, %d and %d) %s' \
573 % (self.team, self.data['yrdln'],
574 self.down, self.yards_togo, self.desc)
575 else:
576 return '(%s, %s) %s' \
577 % (self.team, self.data['yrdln'], self.desc)
578 return self.desc
579
581 """
582 We use the play description to determine equality because the
583 play description can be changed. (Like when a play is reversed.)
584 """
585 return self.playid == other.playid and self.desc == other.desc
586
589
590
592 """
593 Takes a team stats JSON entry and converts it to a TeamStats namedtuple.
594 """
595 return TeamStats(
596 first_downs=int(data['totfd']),
597 total_yds=int(data['totyds']),
598 passing_yds=int(data['pyds']),
599 rushing_yds=int(data['ryds']),
600 penalty_cnt=int(data['pen']),
601 penalty_yds=int(data['penyds']),
602 turnovers=int(data['trnovr']),
603 punt_cnt=int(data['pt']),
604 punt_yds=int(data['ptyds']),
605 punt_avg=int(data['ptavg']),
606 pos_time=PossessionTime(data['top']))
607
608
610 """
611 Takes a home or away JSON entry and converts it to a list of Drive
612 objects.
613 """
614 drive_nums = []
615 for drive_num in data:
616 try:
617 drive_nums.append(int(drive_num))
618 except:
619 pass
620 drives = []
621 playids = set()
622 for i, drive_num in enumerate(sorted(drive_nums), 1):
623 repeat_drive = False
624 for playid in data[str(drive_num)]['plays']:
625 if playid in playids:
626 repeat_drive = True
627 break
628 playids.add(playid)
629 if repeat_drive:
630 continue
631 drives.append(Drive(game, i, home_team, data[str(drive_num)]))
632 return drives
633
634
636 """
637 Takes a single JSON drive entry (data) and converts it to a list
638 of Play objects.
639 """
640 plays = []
641 for playid in map(str, sorted(map(int, data))):
642 plays.append(Play(drive, playid, data[playid]))
643 return plays
644
645
647 """
648 Takes a single JSON play entry (data) and converts it to an OrderedDict
649 of player statistics.
650
651 play is the instance of Play that this data is part of. It is used
652 to determine whether the player belong to the home team or not.
653 """
654 players = OrderedDict()
655 for playerid, statcats in data.iteritems():
656 if playerid == '0':
657 continue
658 for info in statcats:
659 if info['statId'] not in nflgame.statmap.idmap:
660 continue
661 if playerid not in players:
662 home = play.drive.game.is_home(info['clubcode'])
663 if home:
664 team_name = play.drive.game.home
665 else:
666 team_name = play.drive.game.away
667 stats = nflgame.player.PlayPlayerStats(playerid,
668 info['playerName'],
669 home, team_name)
670 players[playerid] = stats
671 statvals = nflgame.statmap.values(info['statId'], info['yards'])
672 players[playerid]._add_stats(statvals)
673 return players
674
675
677 """
678 Takes a single JSON play entry (data) and converts it to a list of events.
679 """
680 temp = list()
681 for playerid, statcats in data.iteritems():
682 for info in statcats:
683 if info['statId'] not in nflgame.statmap.idmap:
684 continue
685 statvals = nflgame.statmap.values(info['statId'], info['yards'])
686 statvals['playerid'] = None if playerid == '0' else playerid
687 statvals['playername'] = info['playerName'] or None
688 statvals['team'] = info['clubcode']
689 temp.append((int(info['sequence']), statvals))
690 return [t[1] for t in sorted(temp, key=lambda t: t[0])]
691
692
694 """
695 Parses the 'home' and 'away' team stats and returns an OrderedDict
696 mapping player id to their total game statistics as instances of
697 nflgame.player.GamePlayerStats.
698 """
699 players = OrderedDict()
700 for team in ('home', 'away'):
701 for category in nflgame.statmap.categories:
702 if category not in data[team]['stats']:
703 continue
704 for pid, raw in data[team]['stats'][category].iteritems():
705 stats = {}
706 for k, v in raw.iteritems():
707 if k == 'name':
708 continue
709 stats['%s_%s' % (category, k)] = v
710 if pid not in players:
711 home = team == 'home'
712 if home:
713 team_name = game.home
714 else:
715 team_name = game.away
716 players[pid] = nflgame.player.GamePlayerStats(pid,
717 raw['name'],
718 home,
719 team_name)
720 players[pid]._add_stats(stats)
721 return players
722
723
725 """
726 Returns the JSON data corresponding to the game represented by eid.
727
728 If the JSON data is already on disk, it is read, decompressed and returned.
729
730 Otherwise, the JSON data is downloaded from the NFL web site. If the data
731 doesn't exist yet or there was an error, _get_json_data returns None.
732
733 If eid is None, then the JSON data is read from the file at fpath.
734 """
735 assert eid is not None or fpath is not None
736
737 if fpath is not None:
738 return gzip.open(fpath).read()
739
740 fpath = _jsonf % eid
741 if os.access(fpath, os.R_OK):
742 return gzip.open(fpath).read()
743 try:
744 return urllib2.urlopen(_json_base_url % (eid, eid), timeout=5).read()
745 except urllib2.HTTPError:
746 pass
747 return None
748
749
751 """
752 Tries to convert v to an integer. If it fails, return 0.
753 """
754 try:
755 return int(v)
756 except:
757 return 0
758