diff options
author | Yann Herklotz <git@yannherklotz.com> | 2019-05-30 16:00:57 +0100 |
---|---|---|
committer | Yann Herklotz <git@yannherklotz.com> | 2019-05-30 16:24:46 +0100 |
commit | a52240fe4f9a160f3fcd65217b7f7307fa13e820 (patch) | |
tree | d07713e8eb3c62550ebbf5ddd94f7504ba5f0e86 /scripts/convert.py | |
parent | 850877011c58199f40ba26c47a071d06f8816b89 (diff) | |
download | verismith-a52240fe4f9a160f3fcd65217b7f7307fa13e820.tar.gz verismith-a52240fe4f9a160f3fcd65217b7f7307fa13e820.zip |
Add conversion script
Diffstat (limited to 'scripts/convert.py')
-rw-r--r-- | scripts/convert.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/scripts/convert.py b/scripts/convert.py new file mode 100644 index 0000000..e179774 --- /dev/null +++ b/scripts/convert.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 + +import sys +from bs4 import BeautifulSoup +import csv + +def main(file_): + with open(file_, "r") as f: + file_contents = f.read() + + soup = BeautifulSoup(file_contents) + table = soup.select_one("table") + headers = [th.text.encode("utf-8") for th in table.select("tr th")] + + with open("out.csv", "w") as f: + wr = csv.writer(f) + wr.writerow(headers) + wr.writerows([[td.text.encode("utf-8") for td in row.find_all("td")] for row in table.select("tr + tr")]) + +if __name__ == '__main__': + main(sys.argv[1]) |