• R/O
  • SSH

chkcsv: Commit

Default repository for chkcsv.py.


Commit MetaInfo

Revision1e00f4280f2f667f5e9d52f5886805e38c5e848c (tree)
Zeit2019-01-04 22:33:56
AutorDreas Nielsen <dreas.nielsen@gmai...>
CommiterDreas Nielsen

Log Message

Added a check, and error report, if any data row has more columns than the number of column headers.

Ändern Zusammenfassung

Diff

diff -r b20d06375291 -r 1e00f4280f2f chkcsv/chkcsv.py
--- a/chkcsv/chkcsv.py Wed Jan 02 20:30:53 2019 -0800
+++ b/chkcsv/chkcsv.py Fri Jan 04 05:33:56 2019 -0800
@@ -39,10 +39,12 @@
3939 # 2011-09-25 First version. Version 0.8.0.0. RDN.
4040 # 2018-10-27 Converted to run under both Python 2 and 3. Version 1.0.0. RDN.
4141 # 2019-01-02 Corrected handling of next() for csv library. Version 1.0.1. RDN.
42+# 2018-01-04 Added check for data rows with more columns than column headers.
43+# Version 1.1.0. RDN.
4244 # ============================================================================
4345
44-_version = "1.0.1"
45-_vdate = "2019-01-02"
46+_version = "1.1.0"
47+_vdate = "2019-01-04"
4648
4749 import sys
4850 from optparse import OptionParser
@@ -447,6 +449,7 @@
447449 :param casesensitive: Whether column names in the specifications and CSV file should be compared case-insensitively.
448450 :param encoding: The character encoding of the CSV file.
449451 """
452+ errorlist = []
450453 dialect = csv.Sniffer().sniff(open(csv_fname, "rt").readline())
451454 encoding = "utf-8" if not encoding else encoding
452455 if sys.version_info < (3,):
@@ -462,7 +465,8 @@
462465 else:
463466 req_missing = [ col for col in req_cols if not (col in colnames) ]
464467 if len(req_missing) > 0:
465- raise ChkCsvError("The following columns are required, but are not present in the CSV file: %s." % ", ".join(req_missing), csv_fname, 1)
468+ errorlist.append(("The following columns are required, but are not present in the CSV file: %s." % ", ".join(req_missing), csv_fname, 1))
469+ return errorlist
466470 # Exit if there are extra columns and the option to exit is set.
467471 if columnexit:
468472 if caseinsensitive:
@@ -471,7 +475,8 @@
471475 else:
472476 extra = [ col for col in colnames if not (col in cols) ]
473477 if len(extra) > 0:
474- raise ChkCsvError("The following columns have no format specifications but are in the CSV file: %s." % ", ".join(extra), csv_fname, 1)
478+ errorlist.append(("The following columns have no format specifications but are in the CSV file: %s." % u", ".join(extra), csv_fname, 1))
479+ return errorlist
475480 # Column names common to specifications and data file. These will be used
476481 # to index the cols dictionary to get the appropriate check method
477482 # and to index the CSV column name list (colnames) to get the column position.
@@ -489,12 +494,17 @@
489494 maxindex = max(dataindex) if len(dataindex) > 0 else 0 # 0 if format file is empty
490495 colloc = dict(zip([ chkcols[c] for c in chkcols ], dataindex))
491496 # Read and check the CSV file until done (or until an error).
492- errorlist = []
493497 row_no = 1 # Header is row 1.
494498 for datarow in inf:
495499 row_no += 1
496500 if (len(datarow) > 0) and (len(datarow) < len(colnames)) and linelength:
497501 errorlist.append(("fewer data values than column headers", csv_fname, row_no))
502+ if halt_on_err:
503+ return errorlist
504+ if (len(datarow) > len(colnames)):
505+ errorlist.append(("more data values than column headers", csv_fname, row_no))
506+ if halt_on_err:
507+ return errorlist
498508 if len(datarow) < maxindex + 1:
499509 if len(datarow) > 0:
500510 errorlist.append(("fewer data values than columns in the format specification", csv_fname, row_no))
@@ -548,19 +558,18 @@
548558
549559
550560 if __name__=='__main__':
551- status = main()
552- #try:
553- # status = main()
554- #except ChkCsvError as msg:
555- # show_errors( [ (msg.errmsg, msg.infile, msg.line, msg.column) ] )
556- # exit(1)
557- #except SystemExit as x:
558- # sys.exit(x)
559- #except Exception:
560- # strace = traceback.extract_tb(sys.exc_info()[2])[-1:]
561- # lno = strace[0][1]
562- # src = strace[0][3]
563- # sys.stderr.write("%s: Uncaught exception %s (%s) on line %s (%s)." % (os.path.basename(sys.argv[0]), str(sys.exc_info()[0]), sys.exc_info()[1], lno, src))
564- # sys.exit(1)
561+ try:
562+ status = main()
563+ except ChkCsvError as msg:
564+ show_errors( [ (msg.errmsg, msg.infile, msg.line, msg.column) ] )
565+ exit(1)
566+ except SystemExit as x:
567+ sys.exit(x)
568+ except Exception:
569+ strace = traceback.extract_tb(sys.exc_info()[2])[-1:]
570+ lno = strace[0][1]
571+ src = strace[0][3]
572+ sys.stderr.write("%s: Uncaught exception %s (%s) on line %s (%s)." % (os.path.basename(sys.argv[0]), str(sys.exc_info()[0]), sys.exc_info()[1], lno, src))
573+ sys.exit(1)
565574 sys.exit(status)
566575
diff -r b20d06375291 -r 1e00f4280f2f doc/source/conf.py
--- a/doc/source/conf.py Wed Jan 02 20:30:53 2019 -0800
+++ b/doc/source/conf.py Fri Jan 04 05:33:56 2019 -0800
@@ -48,7 +48,7 @@
4848
4949 # General information about the project.
5050 project = u'chkcsv'
51-copyright = u'2011, Dreas Nielsen'
51+copyright = u'2011-2019, Dreas Nielsen'
5252 author = u'Dreas Nielsen'
5353
5454 # The version info for the project you're documenting, acts as replacement for
@@ -56,9 +56,9 @@
5656 # built documents.
5757 #
5858 # The short X.Y version.
59-version = u'1.0'
59+version = u'1.1'
6060 # The full version, including alpha/beta/rc tags.
61-release = u'1.0.0'
61+release = u'1.1.0'
6262
6363 # The language for content autogenerated by Sphinx. Refer to documentation
6464 # for a list of supported languages.
@@ -156,7 +156,7 @@
156156 # dir menu entry, description, category)
157157 texinfo_documents = [
158158 (master_doc, 'chkcsv', u'chkcsv Documentation',
159- author, 'chkcsv', 'One line description of project.',
159+ author, 'chkcsv', 'Check the format of a CSV file.',
160160 'Miscellaneous'),
161161 ]
162162
diff -r b20d06375291 -r 1e00f4280f2f doc/source/index.rst
--- a/doc/source/index.rst Wed Jan 02 20:30:53 2019 -0800
+++ b/doc/source/index.rst Fri Jan 04 05:33:56 2019 -0800
@@ -332,7 +332,7 @@
332332 Copyright and License
333333 ================================
334334
335-Copyright (c) 2011, 2018, R.Dreas Nielsen
335+Copyright (c) 2011-2019, R.Dreas Nielsen
336336
337337 This program is free software: you can redistribute it and/or modify it
338338 under the terms of the GNU General Public License as published by the
@@ -342,3 +342,4 @@
342342 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
343343 the GNU General Public License for more details. The GNU General Public
344344 License is available at http://www.gnu.org/licenses/.
345+
diff -r b20d06375291 -r 1e00f4280f2f setup.py
--- a/setup.py Wed Jan 02 20:30:53 2019 -0800
+++ b/setup.py Fri Jan 04 05:33:56 2019 -0800
@@ -1,7 +1,7 @@
11 from distutils.core import setup
22
33 setup(name='chkcsv',
4- version='1.0.1',
4+ version='1.1.0',
55 description="Checks the format of a CSV file with respect to a specifed set of column names and types.",
66 author='Dreas Nielsen',
77 author_email='dreas.nielsen@gmail.com',
Show on old repository browser