Added HTML_Table_to_List

This commit is contained in:
nitish-iiitd 2018-10-15 00:18:56 +05:30
parent 1ebff83516
commit 80c46d29d2
4 changed files with 66 additions and 0 deletions

View File

@ -0,0 +1,22 @@
from bs4 import BeautifulSoup
class HTMLTableToList():
def __init__(self, table_html):
self.table_html = table_html
def get_list(self):
list_of_list = []
soup = BeautifulSoup(self.table_html,"lxml")
table = soup.find('table')
all_tr = table.findAll('tr')
for tr in all_tr:
current_row = []
all_th = tr.findAll('th')
all_td = tr.findAll('td')
for th in all_th:
current_row.append(th.text)
for td in all_td:
current_row.append(td.text)
list_of_list.append(current_row)
return list_of_list

View File

@ -0,0 +1,9 @@
# HTML Table to Python List of List Converter
A simple tool which takes a HTML table as string, and converts it to python list of list data structure and returns the same.
## Libraries Required
1. Beautiful Soap
`$pip install bs4`
## Usage
A sample script `html_table_to_list_usage.py` has been provided to show the usage of the HTMLTableToList. It takes a string of html table, and prints the corresponding list of list.

View File

View File

@ -0,0 +1,35 @@
from HTMLTableToList import HTMLTableToList
from pprint import pprint
html_table_string = """<table class="table table-condensed">
<tr>
<th>RGB</th>
<td>53</td><td>72</td><td>35</td>
</tr>
<tr>
<th>HSL</th><td>0.25</td><td>0.35</td><td>0.21</td>
</tr>
<tr>
<th>HSV</th><td>91&deg;</td><td>51&deg;</td><td>28&deg;</td>
</tr>
<tr>
<th>CMYK</th>
<td>0.26</td><td>0.00</td><td>0.51 &nbsp; 0.72</td>
</tr>
<tr>
<th>XYZ</th><td>4.0889</td><td>5.5130</td><td>2.4387</td>
</tr>
<tr>
<th>Yxy</th><td>5.5130</td><td>0.3396</td><td>0.4579</td>
</tr>
<tr>
<th>Hunter Lab</th><td>23.4798</td><td>-10.0046</td><td>10.2778</td>
</tr>
<tr>
<th>CIE-Lab</th><td>28.1490</td><td>-15.1006</td><td>19.7427</td>
</tr>
</table>"""
htmltabletolist = HTMLTableToList(html_table_string) ## args : HTML table as string
list_of_list = htmltabletolist.get_list()
pprint(list_of_list)