-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcsv_filter.py
More file actions
executable file
·55 lines (46 loc) · 1.85 KB
/
csv_filter.py
File metadata and controls
executable file
·55 lines (46 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
"""
csv_filter: filter a csv file by one or more other csv files
@author: mitzip
@contact: http://github.com/mitzip/csv_filter
@license: Public Domain
@version: 1.0.0
@todo: add argument to toggle case-sensitivity
"""
import sys
import csv
import argparse
# Define accepted command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--filter', type=argparse.FileType('rU'), required=True,
help='CSV file to be filtered')
parser.add_argument('--by', type=argparse.FileType('rU'), required=True,
action='append', help='CSV file to filter by')
parser.add_argument('--output', nargs='?',
type=argparse.FileType('w'), default=sys.stdout,
help='Filtered output CSV filename, omit for stdout')
parser.add_argument('--filter-col', default=3, type=int,
help='Column in CSV to filter to match, default 3')
parser.add_argument('--by-col', default=0, type=int,
help='Column in CSV to filter by to match, default 0')
args = parser.parse_args()
# Open CSV file to be filtered read-only
filter_csv = csv.reader(args.filter)
# Open output CSV file as writable, or stdout
filtered_csv = csv.writer(args.output)
# Open each CSV file to filter by read-only and add to a unique
# set of field values for rows you do NOT want in filtered output.
to_remove = set()
for by_file in args.by:
by_csv = csv.reader(by_file)
next(by_csv)
to_remove.update({row[args.by_col].lower() for row in by_csv if row})
# Check each row in CSV you want filtered for field values in to_remove set
totalRemoved = 0
for row in filter_csv:
if row and row[args.filter_col].lower() not in to_remove:
filtered_csv.writerow(row)
else:
# TODO: Add option to output removed rows
totalRemoved += 1
print "Total Removed:", totalRemoved