#!/usr/bin/python # # Copyright 2014 Larry Hosken # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. """Retrieves quip pages, saves tables as .tsv""" # Mere days after I posted this, Quip added # built-in support for spreadsheets. So... this is no # longer useful. Oh well. # Along with this file, you'll also need token.py and quip.py : import token # one-line token.py that says TOKEN='1234adsf' , but using token # from https://quip.com/api/reference#authentication-personal import quip # copy https://github.com/quip/quip-api/blob/master/python/quip.py import csv import xml.etree.cElementTree import xml.sax.saxutils def main(): client = quip.QuipClient(access_token=token.TOKEN) fetch_tables(client) def fetch_tables(client): threads = client.get_recent_threads(count=20) for key, thread in threads.items(): munge_thread(thread, client) def munge_thread(thread, client): title = thread["thread"]["title"] sanitized_title = ''.join([c for c in title if c.isalnum()]) if not "html" in thread: return if not "tabular" in sanitized_title.lower(): return # Parse the document tree = client.parse_document_html(thread["html"]) for el in tree.iter('table'): table_el = el break else: print 'doc %s named "tabular" but has no table?' return csvfile = open(sanitized_title + '.tsv', 'wb') csvwriter = csv.writer(csvfile, delimiter='\t') for tr_el in table_el.iter('tr'): row = [] for td_el in tr_el.iter('th'): t = '' for i in td_el.itertext(): t += i row.append(t.strip()) if row: csvwriter.writerow(row) for tr_el in table_el.iter('tr'): row = [] for td_el in tr_el.iter('td'): t = '' for i in td_el.itertext(): t += i row.append(t.strip()) if row: csvwriter.writerow(row) html = unicode(xml.etree.cElementTree.tostring(tree)) # Strip the tags that were introduced in parse_document_html html = html[6:-7] document_file_name = sanitized_title + ".html" with open(document_file_name, "w") as document_file: document_file.write(html.encode("utf-8")) if __name__ == '__main__': main()