mirror of
https://github.com/rasbt/python_reference.git
synced 2024-12-18 08:10:24 +00:00
92 lines
2.4 KiB
Python
92 lines
2.4 KiB
Python
|
# Sebastian Raschka 2014
|
||
|
# Prints Information of a SQLite database.
|
||
|
|
||
|
# E.g.,
|
||
|
#
|
||
|
"""
|
||
|
Total rows: 1
|
||
|
|
||
|
Column Info:
|
||
|
ID, Name, Type, NotNull, DefaultVal, PrimaryKey
|
||
|
(0, 'id', 'TEXT', 0, None, 1)
|
||
|
(1, 'date', '', 0, None, 0)
|
||
|
(2, 'time', '', 0, None, 0)
|
||
|
(3, 'date_time', '', 0, None, 0)
|
||
|
|
||
|
Number of entries per column:
|
||
|
date: 1
|
||
|
date_time: 1
|
||
|
id: 1
|
||
|
time: 1
|
||
|
"""
|
||
|
|
||
|
import sqlite3
|
||
|
|
||
|
def connect(sqlite_file):
|
||
|
""" Make connection to an SQLite database file """
|
||
|
conn = sqlite3.connect(sqlite_file)
|
||
|
c = conn.cursor()
|
||
|
return conn, c
|
||
|
|
||
|
def close(conn):
|
||
|
""" Commit changes and close connection to the database """
|
||
|
#conn.commit()
|
||
|
conn.close()
|
||
|
|
||
|
def total_rows(cursor, table_name, print_out=False):
|
||
|
""" Returns the total number of rows in the database """
|
||
|
c.execute('SELECT COUNT(*) FROM {}'.format(table_name))
|
||
|
count = c.fetchall()
|
||
|
if print_out:
|
||
|
print('\nTotal rows: {}'.format(count[0][0]))
|
||
|
return count[0][0]
|
||
|
|
||
|
def table_col_info(cursor, table_name, print_out=False):
|
||
|
"""
|
||
|
Returns a list of tuples with column informations:
|
||
|
(id, name, type, notnull, default_value, primary_key)
|
||
|
|
||
|
"""
|
||
|
c.execute('PRAGMA TABLE_INFO({})'.format(table_name))
|
||
|
info = c.fetchall()
|
||
|
|
||
|
if print_out:
|
||
|
print("\nColumn Info:\nID, Name, Type, NotNull, DefaultVal, PrimaryKey")
|
||
|
for col in info:
|
||
|
print(col)
|
||
|
return info
|
||
|
|
||
|
def values_in_col(cursor, table_name, print_out=True):
|
||
|
""" Returns a dictionary with columns as keys and the number of not-null
|
||
|
entries as associated values.
|
||
|
"""
|
||
|
c.execute('PRAGMA TABLE_INFO({})'.format(table_name))
|
||
|
info = c.fetchall()
|
||
|
col_dict = dict()
|
||
|
for col in info:
|
||
|
col_dict[col[1]] = 0
|
||
|
for col in col_dict:
|
||
|
c.execute('SELECT ({0}) FROM {1} WHERE {0} IS NOT NULL'.format(col, table_name))
|
||
|
# In my case this approach resulted in a better performance than using COUNT
|
||
|
number_rows = len(c.fetchall())
|
||
|
col_dict[col] = number_rows
|
||
|
if print_out:
|
||
|
print("\nNumber of entries per column:")
|
||
|
for i in col_dict.items():
|
||
|
print('{}: {}'.format(i[0], i[1]))
|
||
|
return col_dict
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
|
||
|
sqlite_file = 'my_first_db.sqlite'
|
||
|
table_name = 'my_table_3'
|
||
|
|
||
|
conn, c = connect(sqlite_file)
|
||
|
total_rows(c, table_name, print_out=True)
|
||
|
table_col_info(c, table_name, print_out=True)
|
||
|
values_in_col(c, table_name, print_out=True) # slow on large data bases
|
||
|
|
||
|
close(conn)
|
||
|
|