1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
|
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division
from petl.util.base import Table, iterdata
from petl.compat import text_type
from petl.errors import ArgumentError as PetlArgError
def _get_gspread_client(auth_info):
import gspread
if isinstance(auth_info, gspread.Client):
return auth_info
if isinstance(auth_info, dict):
gd = gspread.service_account_from_dict(auth_info)
return gd
import google
if isinstance(auth_info, google.oauth2.service_account.Credentials):
gc = gspread.authorize(auth_info)
return gc
if auth_info is None:
ga = gspread.service_account()
return ga
raise PetlArgError("gspread: Invalid account credentials")
def _open_spreadsheet(gspread_client, spreadsheet, open_by_key=False):
if open_by_key:
from gspread.exceptions import SpreadsheetNotFound
try:
wb = gspread_client.open_by_key(spreadsheet)
except SpreadsheetNotFound:
wb = gspread_client.open(spreadsheet)
elif spreadsheet is not None:
wb = gspread_client.open(spreadsheet)
else:
raise PetlArgError("gspread requires argument spreadsheet")
return wb
def _select_worksheet(wb, worksheet, find_or_create=False):
# Allow for user to specify no sheet, sheet index or sheet name
if worksheet is None:
ws = wb.sheet1
elif isinstance(worksheet, int):
ws = wb.get_worksheet(worksheet)
elif isinstance(worksheet, text_type):
sheetname = text_type(worksheet)
if find_or_create:
if worksheet in [wbs.title for wbs in wb.worksheets()]:
ws = wb.worksheet(sheetname)
else:
ws = wb.add_worksheet(sheetname, 1, 1)
else:
# use text_type for cross version compatibility
ws = wb.worksheet(sheetname)
else:
raise PetlArgError("Only can find worksheet by name or by number")
return ws
def fromgsheet(
credentials_or_client, spreadsheet, worksheet=None, cell_range=None,
open_by_key=False
):
"""
Extract a table from a google spreadsheet.
The `credentials_or_client` are used to authenticate with the google apis.
For more info, check `authentication`_.
The `spreadsheet` can either be the key of the spreadsheet or its name.
The `worksheet` argument can be omitted, in which case the first
sheet in the workbook is used by default.
The `cell_range` argument can be used to provide a range string
specifying the top left and bottom right corners of a set of cells to
extract. (i.e. 'A1:C7').
Set `open_by_key` to `True` in order to treat `spreadsheet` as spreadsheet key.
.. note::
- Only the top level of google drive will be searched for the
spreadsheet filename due to API limitations.
- The worksheet name is case sensitive.
Example usage follows::
>>> from petl import fromgsheet
>>> import gspread # doctest: +SKIP
>>> client = gspread.service_account() # doctest: +SKIP
>>> tbl1 = fromgsheet(client, 'example_spreadsheet', 'Sheet1') # doctest: +SKIP
>>> tbl2 = fromgsheet(client, '9zDNETemfau0uY8ZJF0YzXEPB_5GQ75JV', credentials) # doctest: +SKIP
This functionality relies heavily on the work by @burnash and his great
`gspread module`_.
.. _gspread module: http://gspread.readthedocs.io/
.. _authentication: http://gspread.readthedocs.io/en/latest/oauth2.html
"""
return GoogleSheetView(
credentials_or_client,
spreadsheet,
worksheet,
cell_range,
open_by_key,
)
class GoogleSheetView(Table):
"""Conects to a worksheet and iterates over its rows."""
def __init__(
self, credentials_or_client, spreadsheet, worksheet, cell_range,
open_by_key
):
self.auth_info = credentials_or_client
self.spreadsheet = spreadsheet
self.worksheet = worksheet
self.cell_range = cell_range
self.open_by_key = open_by_key
def __iter__(self):
gspread_client = _get_gspread_client(self.auth_info)
wb = _open_spreadsheet(gspread_client, self.spreadsheet, self.open_by_key)
ws = _select_worksheet(wb, self.worksheet)
# grab the range or grab the whole sheet
if self.cell_range is not None:
return self._yield_by_range(ws)
return self._yield_all_rows(ws)
def _yield_all_rows(self, ws):
# no range specified, so return all the rows
for row in ws.get_all_values():
yield tuple(row)
def _yield_by_range(self, ws):
found = ws.get_values(self.cell_range)
for row in found:
yield tuple(row)
def togsheet(
table, credentials_or_client, spreadsheet, worksheet=None, cell_range=None,
share_emails=None, role="reader"
):
"""
Write a table to a new google sheet.
The `credentials_or_client` are used to authenticate with the google apis.
For more info, check `authentication`_.
The `spreadsheet` will be the title of the workbook created google sheets.
If there is a spreadsheet with same title a new one will be created.
If `worksheet` is specified, the first worksheet in the spreadsheet
will be renamed to its value.
The spreadsheet will be shared with all emails in `share_emails` with
`role` permissions granted. For more info, check `sharing`_.
Returns: the spreadsheet key that can be used in `appendgsheet` further.
.. _sharing: https://developers.google.com/drive/v3/web/manage-sharing
.. note::
The `gspread`_ package doesn't support serialization of `date` and
`datetime` types yet.
Example usage::
>>> from petl import fromcolumns, togsheet
>>> import gspread # doctest: +SKIP
>>> client = gspread.service_account() # doctest: +SKIP
>>> cols = [[0, 1, 2], ['a', 'b', 'c']]
>>> tbl = fromcolumns(cols)
>>> togsheet(tbl, client, 'example_spreadsheet') # doctest: +SKIP
"""
gspread_client = _get_gspread_client(credentials_or_client)
wb = gspread_client.create(spreadsheet)
ws = wb.sheet1
ws.resize(rows=1, cols=1) # make smallest table possible
# rename sheet if set
if worksheet is not None:
ws.update_title(title=worksheet)
# gspread indices start at 1, therefore row index insert starts at 1
ws.append_rows(table, table_range=cell_range)
# specify the user account to share to
if share_emails is not None:
for user_email in share_emails:
wb.share(user_email, perm_type="user", role=role)
return wb.id
def appendgsheet(
table, credentials_or_client, spreadsheet, worksheet=None,
open_by_key=False, include_header=False
):
"""
Append a table to an existing google shoot at either a new worksheet
or the end of an existing worksheet.
The `credentials_or_client` are used to authenticate with the google apis.
For more info, check `authentication`_.
The `spreadsheet` is the name of the workbook to append to.
The `worksheet` is the title of the worksheet to append to or create when it
does not exist yet.
Set `open_by_key` to `True` in order to treat `spreadsheet` as spreadsheet key.
Set `include_header` to `True` if you don't want omit fieldnames as the
first row appended.
.. note::
The sheet index cannot be used, and None is not an option.
"""
gspread_client = _get_gspread_client(credentials_or_client)
# be able to give filename or key for file
wb = _open_spreadsheet(gspread_client, spreadsheet, open_by_key)
# check to see if worksheet exists, if so append, otherwise create
ws = _select_worksheet(wb, worksheet, True)
rows = table if include_header else list(iterdata(table))
ws.append_rows(rows)
return wb.id
Table.togsheet = togsheet
Table.appendgsheet = appendgsheet
|