Script to sort lines into different files
authorAlex Dehnert <adehnert@mit.edu>
Thu, 2 Feb 2012 04:33:55 +0000 (23:33 -0500)
committerAlex Dehnert <adehnert@mit.edu>
Thu, 2 Feb 2012 04:33:55 +0000 (23:33 -0500)
sort-lines [new file with mode: 0755]

diff --git a/sort-lines b/sort-lines
new file mode 100755 (executable)
index 0000000..8f3b466
--- /dev/null
@@ -0,0 +1,71 @@
+#!/usr/bin/python
+import collections
+import csv
+import sys
+
+class SortFile(object):
+    def __init__(self, code, descr, ):
+        self._fd = None
+        self.code = code
+        self.descr = descr
+
+    def filename(self, ):
+        return "%s.txt" % (self.descr, )
+
+    def getFD(self, ):
+        if not self._fd:
+            self._fd = open(self.filename(), 'a')
+        return self._fd
+
+    def close(self, ):
+        if self._fd:
+            self._fd.close()
+            self._fd = None
+
+
+def read_options(filename):
+    fd = open(filename, 'r')
+    reader = csv.DictReader(fd, dialect='excel-tab', )
+    cats = {}
+    for line in reader:
+        cats[line['code']] = SortFile(code=line['code'], descr=line['descr'])
+    return cats
+
+def process_line(line, cats, dest):
+    print line,
+    count = 0
+    for code, dest in cats.items():
+        if count % 4 == 0: print ""
+        print "(%s) %16s\t\t" % ( code, dest.descr, ),
+        count += 1
+    print ""
+    need_choice = True
+    while need_choice:
+        choice = raw_input("Enter your choice:")
+        if choice == " ":
+            if dest:
+                dest.getFD().write(line)
+                need_choice = False
+        elif choice in cats:
+            dest = cats[choice]
+            dest.getFD().write(line)
+            need_choice = False
+
+def process(infile, cats):
+    fd = open(infile, 'r')
+    dest = None
+    lineno = 0
+    for line in fd:
+        lineno += 1
+        try:
+            dest = process_line(line, cats, dest)
+        except KeyboardInterrupt:
+            print "\n\nCurrent line number:", lineno
+            raise
+
+if __name__ == "__main__":
+    cats_file = sys.argv[1]
+    data_file = sys.argv[2]
+    cats = read_options(cats_file)
+    print cats
+    process(data_file, cats)