From 80c46d29d2e83a17fcd986034e71e678b834b2b4 Mon Sep 17 00:00:00 2001 From: nitish-iiitd Date: Mon, 15 Oct 2018 00:18:56 +0530 Subject: [PATCH] Added HTML_Table_to_List --- HTML_Table_to_List/HTMLTableToList.py | 22 ++++++++++++ HTML_Table_to_List/README.md | 9 +++++ HTML_Table_to_List/__init__.py | 0 .../html_table_to_list_usage.py | 35 +++++++++++++++++++ 4 files changed, 66 insertions(+) create mode 100644 HTML_Table_to_List/HTMLTableToList.py create mode 100644 HTML_Table_to_List/README.md create mode 100644 HTML_Table_to_List/__init__.py create mode 100644 HTML_Table_to_List/html_table_to_list_usage.py diff --git a/HTML_Table_to_List/HTMLTableToList.py b/HTML_Table_to_List/HTMLTableToList.py new file mode 100644 index 0000000..0653c20 --- /dev/null +++ b/HTML_Table_to_List/HTMLTableToList.py @@ -0,0 +1,22 @@ +from bs4 import BeautifulSoup + +class HTMLTableToList(): + + def __init__(self, table_html): + self.table_html = table_html + + def get_list(self): + list_of_list = [] + soup = BeautifulSoup(self.table_html,"lxml") + table = soup.find('table') + all_tr = table.findAll('tr') + for tr in all_tr: + current_row = [] + all_th = tr.findAll('th') + all_td = tr.findAll('td') + for th in all_th: + current_row.append(th.text) + for td in all_td: + current_row.append(td.text) + list_of_list.append(current_row) + return list_of_list \ No newline at end of file diff --git a/HTML_Table_to_List/README.md b/HTML_Table_to_List/README.md new file mode 100644 index 0000000..c02627a --- /dev/null +++ b/HTML_Table_to_List/README.md @@ -0,0 +1,9 @@ +# HTML Table to Python List of List Converter +A simple tool which takes a HTML table as string, and converts it to python list of list data structure and returns the same. + +## Libraries Required +1. Beautiful Soap +`$pip install bs4` + +## Usage +A sample script `html_table_to_list_usage.py` has been provided to show the usage of the HTMLTableToList. It takes a string of html table, and prints the corresponding list of list. \ No newline at end of file diff --git a/HTML_Table_to_List/__init__.py b/HTML_Table_to_List/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/HTML_Table_to_List/html_table_to_list_usage.py b/HTML_Table_to_List/html_table_to_list_usage.py new file mode 100644 index 0000000..d7f1b7b --- /dev/null +++ b/HTML_Table_to_List/html_table_to_list_usage.py @@ -0,0 +1,35 @@ +from HTMLTableToList import HTMLTableToList +from pprint import pprint + +html_table_string = """ + + + + + + + + + + + + + + + + + + + + + + + + + + +
RGB537235
HSL0.250.350.21
HSV91°51°28°
CMYK0.260.000.51   0.72
XYZ4.08895.51302.4387
Yxy5.51300.33960.4579
Hunter Lab23.4798-10.004610.2778
CIE-Lab28.1490-15.100619.7427
""" + +htmltabletolist = HTMLTableToList(html_table_string) ## args : HTML table as string +list_of_list = htmltabletolist.get_list() +pprint(list_of_list) \ No newline at end of file