2014-04-01 19:18:34 +00:00
|
|
|
# Sebastian Raschka 03/2014
|
|
|
|
|
|
|
|
import gzip
|
|
|
|
import os
|
|
|
|
|
|
|
|
def conc_gzip_files(in_dir, out_file, append=False):
|
|
|
|
""" Reads contents from gzipped ASCII or UTF-8 files, decodes them, and
|
|
|
|
appends the lines to one output file.
|
|
|
|
|
|
|
|
Keyword arguments:
|
|
|
|
in_dir (str): Path of the directory with the gzip-files
|
|
|
|
out_file (str): Path to the resulting file
|
|
|
|
append (bool): If true, it appends contents to an exisiting file,
|
|
|
|
else creates a new output file.
|
|
|
|
|
|
|
|
"""
|
2014-04-01 20:00:20 +00:00
|
|
|
write_mode = 'wb'
|
2014-04-01 19:18:34 +00:00
|
|
|
if append:
|
2014-04-01 20:00:20 +00:00
|
|
|
write_mode = 'ab'
|
2014-04-01 19:40:27 +00:00
|
|
|
gzips = [os.path.join(in_dir, i) for i in os.listdir(in_dir) if i.endswith('.gz')]
|
2014-04-01 19:18:34 +00:00
|
|
|
with open(out_file, write_mode) as ofile:
|
|
|
|
for f in gzips:
|
|
|
|
with gzip.open(f, 'rb') as gzipf:
|
|
|
|
for line in gzipf:
|
2014-04-01 19:59:49 +00:00
|
|
|
ofile.write(line)
|
2014-04-01 19:18:34 +00:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
conc_gzip_files('/home/usr/my_dir', '/home/usr/test.txt')
|