-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsheetutils.py
More file actions
140 lines (116 loc) · 4.52 KB
/
sheetutils.py
File metadata and controls
140 lines (116 loc) · 4.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from googleapiclient.discovery import build, Resource
from google.oauth2 import service_account
import os
import sys
import pandas as pd
from typing import TypeAlias, Any
import logging
logger = logging # use default logger
class GoogleSheet:
"""
represents a single spreadsheet (i.e. a single tab in sheets)
and allows read and update operations
"""
def __init__(self, sheet_obj: Resource):
self.sheet = sheet_obj
def read(self) -> pd.DataFrame:
"""
read spreadsheet from service resource object into dataframe
"""
sheet = self.sheet
data = sheet.get('values', [])
if not data:
logger.warn(f"read_google_sheet - No data found in the specified worksheet.")
# Return empty DataFrame
return pd.DataFrame()
else:
logger.info(f"read_google_sheet - Read of Google Sheet Successful.")
# Convert to DataFrame
# First row as headers, rest as data
headers = data[0]
rows = data[1:] if len(data) > 1 else []
# Pad rows with fewer columns with fill_value
fill_value = None
max_columns = len(headers)
padded_rows = [row + [fill_value] * (max_columns - len(row)) for row in rows]
# Create DataFrame
df = pd.DataFrame(padded_rows, columns=headers)
return df
def update(self, df: pd.DataFrame) -> tuple[bool, str]:
"""
write contents of df into spreadsheet. Note that this
overwrites the spreadsheet contents
"""
# Convert DataFrame to list of lists (including headers)
values = [df.columns.tolist()] + df.values.tolist()
# Prepare the body for the API request
body = {
'values': values
}
# Update the sheet with DataFrame contents
try:
result = sheet.values().update(
spreadsheetId=spreadsheet_id,
range=f'{sheet_name}!A1',
valueInputOption='RAW',
body=body
).execute()
except Exception as e:
logger.err(f"Failed to update {spreadsheet_id}:{sheet_name} due to {e}")
sys.exit(1)
updated_cells = result.get('updatedCells', 0)
logger.info(f"Successfully updated {updated_cells} cells")
return True, f"Successfully updated {updated_cells} cells"
class GoogleSheetManager:
"""
handles the boilerplate of creating an authenticated
service and returning a spreadsheet object.
"""
def __init__(self):
self._service = None
def connect(self, credentials_file:str) -> Resource:
"""
Connects to the Google Sheets API using service account credentials.
Args:
credentials_file (str): Path to the Google service account credentials file.
Returns:
build: The Google Sheets API service object.
"""
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
CONFIG_FILE = credentials_file
if not os.path.exists(CONFIG_FILE):
raise Exception(f"Configuration file not found: {CONFIG_FILE}")
try:
creds = service_account.Credentials.from_service_account_file(
CONFIG_FILE,
scopes=SCOPES
)
self._service = build('sheets', 'v4', credentials=creds)
return self.service
except Exception as e:
raise Exception(f"Failed to create Google Sheets service: {str(e)}")
@property
def service(self) -> Resource:
"""
getter for service.
Using service as property allows instantiation of object and
authentication to be separated, while also ensuring that all calls
to service are authenticated
"""
if self._service is None:
raise ValueError("Connect not executed")
return self._service
def sheet(self, spreadsheet_id:str, sheet_name:str) -> GoogleSheet:
"""
Uses instantiated service to fetch Google Sheet
`spreadsheet_id` and fetches the `sheet_name` spreadsheet
"""
try:
sheet = self.service.spreadsheets().values().get(
spreadsheetId=spreadsheet_id, # spreadsheet id is base64 in edit url
range=sheet_name
).execute()
except Exception as e:
logger.err(f"Failed to read {spreadsheet_id}:{sheet_name} due to {e}")
sys.exit(1)
return GoogleSheet(sheet)