#!/usr/bin/python
#
# Copyright 2014 Larry Hosken
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

"""Retrieves quip pages, saves tables as .tsv"""

# Mere days after I posted this, Quip added
# built-in support for spreadsheets. So... this is no
# longer useful. Oh well.


# Along with this file, you'll also need token.py and quip.py :

import token # one-line token.py that says TOKEN='1234adsf' , but using token
             # from https://quip.com/api/reference#authentication-personal

import quip  # copy https://github.com/quip/quip-api/blob/master/python/quip.py

import csv
import xml.etree.cElementTree
import xml.sax.saxutils

def main():
    client = quip.QuipClient(access_token=token.TOKEN)
    fetch_tables(client)

def fetch_tables(client):
    threads = client.get_recent_threads(count=20)
    for key, thread in threads.items():
      munge_thread(thread, client)

def munge_thread(thread, client):
    title = thread["thread"]["title"]
    sanitized_title = ''.join([c for c in title if c.isalnum()])
    if not "html" in thread: return
    if not "tabular" in sanitized_title.lower(): return

    # Parse the document
    tree = client.parse_document_html(thread["html"])
    for el in tree.iter('table'):
      table_el = el
      break
    else:
      print 'doc %s named "tabular" but has no table?'
      return
    
    csvfile = open(sanitized_title + '.tsv', 'wb')
    csvwriter = csv.writer(csvfile, delimiter='\t')
    for tr_el in table_el.iter('tr'):
      row = []
      for td_el in tr_el.iter('th'):
        t = ''
        for i in td_el.itertext(): t += i
        row.append(t.strip())
      if row: csvwriter.writerow(row)
    for tr_el in table_el.iter('tr'):
      row = []
      for td_el in tr_el.iter('td'):
        t = ''
        for i in td_el.itertext(): t += i
        row.append(t.strip())
      if row: csvwriter.writerow(row)

    html = unicode(xml.etree.cElementTree.tostring(tree))
    # Strip the <html> tags that were introduced in parse_document_html
    html = html[6:-7]

    document_file_name = sanitized_title + ".html"
    with open(document_file_name, "w") as document_file:
        document_file.write(html.encode("utf-8"))


if __name__ == '__main__':
    main()