Update conc_gzip_files.py

This commit is contained in:
Sebastian Raschka 2014-04-01 16:08:26 -04:00
parent 86c23d5123
commit a2f6f2c4b3

View File

@ -1,9 +1,12 @@
# Sebastian Raschka 03/2014 # Sebastian Raschka 03/2014
import gzip import gzip
import shutil
import os import os
def conc_gzip_files(in_dir, out_file, append=False): #import pyprind
def conc_gzip_files(in_dir, out_file, append=False, print_progress=True):
""" Reads contents from gzipped ASCII or UTF-8 files, decodes them, and """ Reads contents from gzipped ASCII or UTF-8 files, decodes them, and
appends the lines to one output file. appends the lines to one output file.
@ -12,17 +15,19 @@ def conc_gzip_files(in_dir, out_file, append=False):
out_file (str): Path to the resulting file out_file (str): Path to the resulting file
append (bool): If true, it appends contents to an exisiting file, append (bool): If true, it appends contents to an exisiting file,
else creates a new output file. else creates a new output file.
print_progress (bool): prints progress bar if true.
""" """
write_mode = 'wb' write_mode = 'wb'
if append:
write_mode = 'ab'
gzips = [os.path.join(in_dir, i) for i in os.listdir(in_dir) if i.endswith('.gz')] gzips = [os.path.join(in_dir, i) for i in os.listdir(in_dir) if i.endswith('.gz')]
with open(out_file, write_mode) as ofile: #if print_progress:
# pbar = pyprind.ProgBar(len(gzips))
with open(out_file, 'ab' if append else 'wb') as ofile:
for f in gzips: for f in gzips:
with gzip.open(f, 'rb') as gzipf: with gzip.open(f, 'rb') as gzipf:
for line in gzipf: shutil.copyfileobj(gzipf, ofile)
ofile.write(line) #if print_progress:
# pbar.update()
if __name__ == '__main__': if __name__ == '__main__':
conc_gzip_files('/home/usr/my_dir', '/home/usr/test.txt') conc_gzip_files('/home/usr/my_dir', '/home/usr/test.txt')