diff --git a/.env_template b/.env_template index 56b5add..7c5f29d 100644 --- a/.env_template +++ b/.env_template @@ -2,4 +2,6 @@ YOUTUBE_API_KEY= MONGO_URI= MONGO_DB= STAGE= -DAILY_SUN_URL= \ No newline at end of file +DAILY_SUN_URL= +GOOGLE_APPLICATION_CREDENTIALS= +FIREBASE_CREDENTIALS_HOST_PATH=./firebase-service-account-key.json \ No newline at end of file diff --git a/.gitignore b/.gitignore index 7e14a5b..76f90e5 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ __pycache__/ .env .envrc .DS_Store -ca-certificate.crt \ No newline at end of file +ca-certificate.crt +firebase-service-account-key.json \ No newline at end of file diff --git a/app.py b/app.py index 9720900..2e113b4 100644 --- a/app.py +++ b/app.py @@ -23,6 +23,27 @@ from src.utils.team_loader import TeamLoader from src.database import db +import os +import firebase_admin +from firebase_admin import credentials + +SERVICE_ACCOUNT_PATH = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") + + +def initialize_firebase(): + if not firebase_admin._apps: + if not SERVICE_ACCOUNT_PATH: + raise ValueError( + "GOOGLE_APPLICATION_CREDENTIALS is not set. Set it to your firebase-service-account-key.json path." + ) + cred = credentials.Certificate(SERVICE_ACCOUNT_PATH) + firebase_admin.initialize_app(cred) + logging.info("Firebase app initialized.") + return firebase_admin.get_app() + + +initialize_firebase() + app = Flask(__name__) # CORS: allow frontend (different origin) to call this API @@ -143,16 +164,6 @@ def signal_handler(sig, frame): signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) -# Only parse arguments when running directly (not when imported by gunicorn) -if __name__ == "__main__": - args = parse_args() -else: - # Default args when imported by gunicorn - class DefaultArgs: - no_scrape = False - no_daily_sun = False - args = DefaultArgs() - # Only run scraping tasks if not disabled if not args.no_scrape: from flask_apscheduler import APScheduler diff --git a/docker-compose.yml b/docker-compose.yml index 339a5f2..ce81025 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,10 +4,13 @@ services: app: image: cornellappdev/score-dev:${IMAGE_TAG} env_file: .env + environment: + GOOGLE_APPLICATION_CREDENTIALS: /app/secrets/firebase.json ports: - "8000:8000" volumes: - ./ca-certificate.crt:/etc/ssl/ca-certificate.crt:ro # Mount MongoDB cert inside the container, ro for read only + - ${FIREBASE_CREDENTIALS_HOST_PATH:-./firebase-service-account-key.json}:/app/secrets/firebase.json:ro scraper: image: cornellappdev/score-dev:${IMAGE_TAG} diff --git a/requirements.txt b/requirements.txt index 6ef629f..f4df598 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ Flask-APScheduler python-dotenv pytz gunicorn +firebase-admin==7.3.0 \ No newline at end of file diff --git a/src/database.py b/src/database.py index 20f1331..70d5f4b 100644 --- a/src/database.py +++ b/src/database.py @@ -70,6 +70,7 @@ def setup_database_indexes(): game_collection.create_index([("date", -1)], background=True) try: + # Ensure doubleheaders on the same day remain distinct by including `time`. game_collection.create_index( [ ("sport", 1), @@ -79,8 +80,10 @@ def setup_database_indexes(): ("city", 1), ("state", 1), ("location", 1), + ("time", 1), ], unique=True, + name="uniq_game_key_with_time", background=True ) except (DuplicateKeyError, OperationFailure) as e: @@ -101,6 +104,13 @@ def setup_database_indexes(): # JWT blocklist: fast lookup by jti db["token_blocklist"].create_index([("jti", 1)], background=True) + try: + db["users"].create_index( + [("firebase_uid", 1)], unique=True, sparse=True, background=True + ) + except (DuplicateKeyError, OperationFailure) as e: + print(f"Warning: Could not create unique index on users.firebase_uid: {e}") + print("✅ MongoDB indexes created successfully") except Exception as e: print(f"❌ Failed to create MongoDB indexes: {e}") diff --git a/src/mutations/login_user.py b/src/mutations/login_user.py index b606738..91591c3 100644 --- a/src/mutations/login_user.py +++ b/src/mutations/login_user.py @@ -1,19 +1,36 @@ from graphql import GraphQLError -from graphene import Mutation, String, Field +from graphene import Mutation, String +from firebase_admin import auth as firebase_auth from flask_jwt_extended import create_access_token, create_refresh_token from src.database import db +_TOKEN_ERRORS = ( + firebase_auth.InvalidIdTokenError, + firebase_auth.ExpiredIdTokenError, + firebase_auth.RevokedIdTokenError, +) + class LoginUser(Mutation): class Arguments: - net_id = String(required=True, description="User's net ID (e.g. Cornell netid).") + id_token = String(required=True, description="Firebase ID token from the client.") access_token = String() refresh_token = String() - def mutate(self, info, net_id): - user = db["users"].find_one({"net_id": net_id}) + def mutate(self, info, id_token): + try: + decoded = firebase_auth.verify_id_token(id_token) + except _TOKEN_ERRORS as err: + raise GraphQLError("Invalid or expired token.") from err + except ValueError as err: + raise GraphQLError("Invalid or expired token.") from err + + firebase_uid = decoded.get("uid") + if not firebase_uid: + raise GraphQLError("Invalid or expired token.") + user = db["users"].find_one({"firebase_uid": firebase_uid}) if not user: raise GraphQLError("User not found.") identity = str(user["_id"]) diff --git a/src/mutations/signup_user.py b/src/mutations/signup_user.py index eb6f6ae..d4e530d 100644 --- a/src/mutations/signup_user.py +++ b/src/mutations/signup_user.py @@ -1,31 +1,51 @@ from graphql import GraphQLError from graphene import Mutation, String +from firebase_admin import auth as firebase_auth from flask_jwt_extended import create_access_token, create_refresh_token +from pymongo.errors import DuplicateKeyError from src.database import db +_TOKEN_ERRORS = ( + firebase_auth.InvalidIdTokenError, + firebase_auth.ExpiredIdTokenError, + firebase_auth.RevokedIdTokenError, +) + class SignupUser(Mutation): class Arguments: - net_id = String(required=True, description="User's net ID (e.g. Cornell netid).") + id_token = String(required=True, description="Firebase ID token from the client.") name = String(required=False, description="Display name.") - email = String(required=False, description="Email address.") + email = String(required=False, description="Email (overrides token email if provided).") access_token = String() refresh_token = String() - def mutate(self, info, net_id, name=None, email=None): - if db["users"].find_one({"net_id": net_id}): - raise GraphQLError("Net ID already exists.") + def mutate(self, info, id_token, name=None, email=None): + try: + decoded = firebase_auth.verify_id_token(id_token) + except _TOKEN_ERRORS as err: + raise GraphQLError("Invalid or expired token.") from err + except ValueError as err: + raise GraphQLError("Invalid or expired token.") from err + + firebase_uid = decoded.get("uid") + if firebase_uid is None: + raise GraphQLError("Token missing uid") from KeyError("uid") + + email = email or decoded.get("email") user_doc = { - "net_id": net_id, + "firebase_uid": firebase_uid, + "email": email, "favorite_game_ids": [], } if name is not None: user_doc["name"] = name - if email is not None: - user_doc["email"] = email - result = db["users"].insert_one(user_doc) + try: + result = db["users"].insert_one(user_doc) + except DuplicateKeyError as err: + raise GraphQLError("User already exists.") from err identity = str(result.inserted_id) return SignupUser( access_token=create_access_token(identity=identity), diff --git a/src/repositories/game_repository.py b/src/repositories/game_repository.py index e531286..5de3269 100644 --- a/src/repositories/game_repository.py +++ b/src/repositories/game_repository.py @@ -11,6 +11,16 @@ logger = logging.getLogger(__name__) +def _time_for_lookup(time): + """True when `time` should be included in a query (aligned with uniq_game_key_with_time).""" + if time is None: + return False + s = str(time).strip() + if not s or s in ("TBD", "TBA"): + return False + return True + + class GameRepository: @staticmethod def find_all(limit=100, offset=0): @@ -103,24 +113,26 @@ def find_by_data(city, date, gender, location, opponent_id, sport, state, time): return Game.from_dict(game_data) if game_data else None @staticmethod - def find_by_key_fields(city, date, gender, location, opponent_id, sport, state): + def find_by_key_fields(city, date, gender, location, opponent_id, sport, state, time=None): """ - Find games without time for duplicate games + Find games by key fields. When `time` is a concrete value (not TBD/TBA), the query + includes it so doubleheaders resolve to a single row. Otherwise falls back to the + legacy filter without time (multiple rows possible). """ game_collection = db["game"] - games = list( - game_collection.find( - { - "city": city, - "date": date, - "gender": gender, - "location": location, - "opponent_id": opponent_id, - "sport": sport, - "state": state, - } - ) - ) + base = { + "city": city, + "date": date, + "gender": gender, + "location": location, + "opponent_id": opponent_id, + "sport": sport, + "state": state, + } + if _time_for_lookup(time): + games = list(game_collection.find({**base, "time": time})) + else: + games = list(game_collection.find(base)) if not games: return None @@ -131,7 +143,7 @@ def find_by_key_fields(city, date, gender, location, opponent_id, sport, state): return [Game.from_dict(game) for game in games] @staticmethod - def find_by_tournament_key_fields(city, date, gender, location, sport, state): + def find_by_tournament_key_fields(city, date, gender, location, sport, state, time=None): """ Find tournament games by location and date (excluding opponent_id). This is used when we need to find a tournament game that might have a placeholder team. @@ -145,6 +157,8 @@ def find_by_tournament_key_fields(city, date, gender, location, sport, state): "gender": gender, "sport": sport, } + if _time_for_lookup(time): + query["time"] = time # For city, state, and location, use flexible matching # This allows finding games even when TBD/TBA values change to real values diff --git a/src/schema.py b/src/schema.py index 70b5473..3bda2d0 100644 --- a/src/schema.py +++ b/src/schema.py @@ -31,9 +31,11 @@ class Mutation(ObjectType): create_team = CreateTeam.Field(description="Creates a new team.") create_youtube_video = CreateYoutubeVideo.Field(description="Creates a new youtube video.") create_article = CreateArticle.Field(description="Creates a new article.") - login_user = LoginUser.Field(description="Login by net_id; returns access_token and refresh_token.") + login_user = LoginUser.Field( + description="Login with Firebase ID token; returns access_token and refresh_token.", + ) signup_user = SignupUser.Field( - description="Create a new user by net_id; returns access_token and refresh_token (no separate login needed).", + description="Create a new user with Firebase ID token; returns access_token and refresh_token.", ) refresh_access_token = RefreshAccessToken.Field( description="Exchange a valid refresh token (in Authorization header) for a new access_token.", diff --git a/src/scrapers/game_details_scrape.py b/src/scrapers/game_details_scrape.py index 5f2f3b1..0f5a07c 100644 --- a/src/scrapers/game_details_scrape.py +++ b/src/scrapers/game_details_scrape.py @@ -53,6 +53,35 @@ def extract_teams_and_scores(box_score_section, sport): return team_names, period_scores +def softball_summary(box_score_section): + summary = [] + scoring_section = box_score_section.find(TAG_SECTION, {ATTR_ARIA_LABEL: LABEL_SCORING_SUMMARY}) + if scoring_section: + scoring_rows = scoring_section.find(TAG_TBODY) + if scoring_rows: + for row in scoring_rows.find_all(TAG_TR): + team = row.find_all(TAG_TD)[0].find(TAG_IMG)[ATTR_ALT] + inning = row.find_all(TAG_TD)[3].text.strip() + desc_cell = row.find_all(TAG_TD)[4] + span = desc_cell.find(TAG_SPAN) + if span: + span.extract() + desc = desc_cell.get_text(strip=True) + cornell_score = int(row.find_all(TAG_TD)[5].get_text(strip=True) or 0) + opp_score = int(row.find_all(TAG_TD)[6].get_text(strip=True) or 0) + summary.append({ + 'team': team, + 'period': inning, + 'inning': inning, + 'description': desc, + 'cor_score': cornell_score, + 'opp_score': opp_score + }) + if not summary: + summary = [{"message": "No scoring events in this game."}] + return summary + + def soccer_summary(box_score_section): summary = [] scoring_section = box_score_section.find(TAG_SECTION, {ATTR_ARIA_LABEL: LABEL_SCORING_SUMMARY}) @@ -124,7 +153,7 @@ def hockey_summary(box_score_section): scorer = row.find_all(TAG_TD)[4].text.strip() assist = row.find_all(TAG_TD)[5].text.strip() - if team == "COR" or team == "CU" or team == "Cornell": + if team == "COR" or team == "CU" or team == "Cornell" or team == "CORNELL": cornell_score += 1 else: opp_score += 1 @@ -272,6 +301,7 @@ def scrape_game(url, sport): 'field hockey': (lambda: extract_teams_and_scores(box_score_section, 'field hockey'), field_hockey_summary), 'lacrosse': (lambda: extract_teams_and_scores(box_score_section, 'lacrosse'), lacrosse_summary), 'baseball': (lambda: extract_teams_and_scores(box_score_section, 'baseball'), baseball_summary), + 'softball': (lambda: extract_teams_and_scores(box_score_section, 'softball'), softball_summary), 'basketball': (lambda: extract_teams_and_scores(box_score_section, 'basketball'), lambda _: []), } diff --git a/src/scrapers/games_scraper.py b/src/scrapers/games_scraper.py index 818760c..3392ee8 100644 --- a/src/scrapers/games_scraper.py +++ b/src/scrapers/games_scraper.py @@ -125,7 +125,8 @@ def parse_schedule_page(url, sport, gender): result_tag = game_item.select_one(RESULT_TAG) if result_tag: - game_data["result"] = result_tag.text.strip().replace("\n", "") + raw = result_tag.get_text(" ", strip=True) + game_data["result"] = re.sub(r"\s+", " ", raw).strip() else: game_data["result"] = None @@ -241,17 +242,17 @@ def process_game_data(game_data): if str(final_box_cor_score) != str(cor_final) or str(final_box_opp_score) != str(opp_final): game_data["score_breakdown"] = game_data["score_breakdown"][::-1] - # Try to find by tournament key fields to handle placeholder teams + # Try to find an existing game record to update. curr_game = GameService.get_game_by_tournament_key_fields( city, game_data["date"], game_data["gender"], location, game_data["sport"], - state + state, + game_time, ) - - # If no tournament game found, try the regular lookup with opponent_id + if not curr_game: curr_game = GameService.get_game_by_key_fields( city, @@ -260,7 +261,8 @@ def process_game_data(game_data): location, team.id, game_data["sport"], - state + state, + game_time, ) if isinstance(curr_game, list): @@ -268,6 +270,7 @@ def process_game_data(game_data): curr_game = curr_game[0] else: curr_game = None + if curr_game: updates = { "time": game_time, diff --git a/src/services/game_service.py b/src/services/game_service.py index 6fd3479..6580aea 100644 --- a/src/services/game_service.py +++ b/src/services/game_service.py @@ -74,22 +74,22 @@ def get_game_by_data(city, date, gender, location, opponent_id, sport, state, ti ) @staticmethod - def get_game_by_key_fields(city, date, gender, location, opponent_id, sport, state): + def get_game_by_key_fields(city, date, gender, location, opponent_id, sport, state, time=None): """ - Retrieve a game by its essential fields, ignoring time + Retrieve game(s) by key fields. Pass `time` when known so doubleheaders match one row. """ return GameRepository.find_by_key_fields( - city, date, gender, location, opponent_id, sport, state + city, date, gender, location, opponent_id, sport, state, time ) @staticmethod - def get_game_by_tournament_key_fields(city, date, gender, location, sport, state): + def get_game_by_tournament_key_fields(city, date, gender, location, sport, state, time=None): """ Retrieve a tournament game by location and date (excluding opponent_id). This is used when we need to find a tournament game that might have a placeholder team. """ return GameRepository.find_by_tournament_key_fields( - city, date, gender, location, sport, state + city, date, gender, location, sport, state, time ) @staticmethod diff --git a/src/types.py b/src/types.py index 7eb8fbe..15ce333 100644 --- a/src/types.py +++ b/src/types.py @@ -42,7 +42,8 @@ class BoxScoreEntryType(ObjectType): Attributes: - `team`: The team involved in the scoring event. - - `period`: The period or inning of the event. + - `period`: The period of the event (e.g. hockey period). + - `inning`: The inning of the event (e.g. baseball/softball). - `time`: The time of the scoring event. - `description`: A description of the play or scoring event. - `scorer`: The name of the scorer. @@ -54,6 +55,7 @@ class BoxScoreEntryType(ObjectType): team = String(required=False) period = String(required=False) + inning = String(required=False) time = String(required=False) description = String(required=False) scorer = String(required=False)