• R/O
  • SSH

chkcsv: Commit

Default repository for chkcsv.py.


Commit MetaInfo

Revision0bcecc677719af348eea7a9b4da4d9b9e845859e (tree)
Zeit2018-10-28 05:44:49
AutorDreas Nielsen <dreas.nielsen@gmai...>
CommiterDreas Nielsen

Log Message

Modified to run under Python 3 as well as 2, and factored the code for easier use as a library module.

Ändern Zusammenfassung

Diff

diff -r 1a7ea99a560b -r 0bcecc677719 .hgignore
--- a/.hgignore Sat Jul 28 16:59:29 2018 -0700
+++ b/.hgignore Sat Oct 27 13:44:49 2018 -0700
@@ -2,6 +2,8 @@
22 MANIFEST
33 chkcsv.htm
44 .pypirc
5+.*~
6+.*\.pyc
57 dist/*
68 doc/build/*
79 test/*
diff -r 1a7ea99a560b -r 0bcecc677719 chkcsv/__init__.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/chkcsv/__init__.py Sat Oct 27 13:44:49 2018 -0700
@@ -0,0 +1,1 @@
1+from .chkcsv import *
diff -r 1a7ea99a560b -r 0bcecc677719 chkcsv/chkcsv.py
--- a/chkcsv/chkcsv.py Sat Jul 28 16:59:29 2018 -0700
+++ b/chkcsv/chkcsv.py Sat Oct 27 13:44:49 2018 -0700
@@ -20,7 +20,7 @@
2020 # though a section name for them is reserved.
2121 #
2222 # COPYRIGHT:
23-# Copyright (c) 2011, R.Dreas Nielsen (RDN)
23+# Copyright (c) 2011,2018 R.Dreas Nielsen (RDN)
2424 #
2525 # LICENSE:
2626 # GPL v.3
@@ -37,14 +37,20 @@
3737 # Date Remarks
3838 # ---------- --------------------------------------------------------------
3939 # 2011-09-25 First version. Version 0.8.0.0. RDN.
40+# 2018-10-27 Converted to run under both Python 2 and 3. Version 1.0.0. RDN.
4041 # ============================================================================
4142
42-_version = "0.8.0.0"
43-_vdate = "2011-09-24"
43+_version = "1.0.0"
44+_vdate = "2018-10-27"
4445
4546 import sys
4647 from optparse import OptionParser
47-import ConfigParser
48+try:
49+ # Py2
50+ from ConfigParser import SafeConfigParser as ConfigParser
51+except:
52+ # Py3
53+ from configparser import ConfigParser
4854 import codecs
4955 import os.path
5056 import csv
@@ -71,15 +77,23 @@
7177 self.column = column
7278
7379 class CsvChecker():
74- """Object to check a specific column of a defined type. After initialization, the 'check()'
75- method will return a boolean indicating whether a data value is acceptable."""
80+ """Create an object to check a specific column of a defined type.
81+
82+ :param fmt_spec: A ConfigParser object.
83+ :param colname: The name of the data column.
84+ :param column_required_default: A Boolean indicating whether the column is required by default.
85+ :param data_required_default: A Boolean indicating whether data values are required (non-null) by default.
86+
87+ After initialization, the 'check()'
88+ method will return a boolean indicating whether a data value is acceptable.
89+ """
7690 get_fn = {
77- 'column_required' : ConfigParser.SafeConfigParser.getboolean,
78- 'data_required' : ConfigParser.SafeConfigParser.getboolean,
79- 'type' : ConfigParser.SafeConfigParser.get,
80- 'minlen' : ConfigParser.SafeConfigParser.getint,
81- 'maxlen' : ConfigParser.SafeConfigParser.getint,
82- 'pattern' : ConfigParser.SafeConfigParser.get
91+ 'column_required' : ConfigParser.getboolean,
92+ 'data_required' : ConfigParser.getboolean,
93+ 'type' : ConfigParser.get,
94+ 'minlen' : ConfigParser.getint,
95+ 'maxlen' : ConfigParser.getint,
96+ 'pattern' : ConfigParser.get
8397 }
8498 datetime_fmts = ("%x",
8599 "%c",
@@ -214,7 +228,7 @@
214228 return None
215229 if type(data) == type(datetime.date.today()):
216230 return None
217- if type(data) != types.StringType:
231+ if type(data) != type(""):
218232 if data==None:
219233 return "missing date/time"
220234 try:
@@ -235,7 +249,7 @@
235249 return None
236250 if type(data) == type(datetime.date.today()):
237251 return None
238- if type(data) != types.StringType:
252+ if type(data) != type(""):
239253 if data==None:
240254 return "missing date"
241255 try:
@@ -354,49 +368,76 @@
354368 self.reader = codecs.getreader(encoding)(f)
355369 def __iter__(self):
356370 return self
357- def next(self):
358- return self.reader.next().encode('utf-8')
371+ def __next__(self):
372+ return next(self.reader).encode('utf-8')
359373
360374 class UnicodeReader:
361375 """A CSV reader which will iterate over lines in the CSV file "f",
362- which is encoded in the given encoding."""
376+ which is encoded in the given encoding.
377+ """
363378 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
364379 f = UTF8Recoder(f, encoding)
365380 self.reader = csv.reader(f, dialect=dialect, **kwds)
366- def next(self):
367- row = self.reader.next()
381+ def __next__(self):
382+ row = next(self.reader)
368383 return [unicode(s, "utf-8") for s in row]
369384 def __iter__(self):
370385 return self
371386
372387 def show_errors(errlist):
373- """Items in errlist are a tuple of a narrative message, the name of the file
374- in which the error occurred, the line number of the file, and the column
375- name of the file. All but the first may be null."""
388+ """Write a list of error messages to stderr.
389+
390+ :param errlist: A tuple of a narrative message, the name of the file
391+ in which the error occurred, the line number of the file, and the column
392+ name of the file. All but the first may be null.
393+ """
376394 for err in errlist:
377395 sys.stderr.write("%s.\n" % " ".join([ "%s %s" % em for em in [ e for e in
378396 zip(("Error:", "in file", "on line", "in column"), err) if e[1]]]))
379397
380398
399+def read_format_specs(fmt_file, column_required, data_required, chkopts="chkcsvoptions"):
400+ """Read format specifications from a file.
401+
402+ :param fmt_file: The name of the file containing format specifications.
403+ :param column_required: Whether or not the column must be in the CSV file to be checked.
404+ :param data_required: Whether or not a data value is required on every row of the CSV file.
405+ :param chkopts: The name of a section in the format specification file containing additional options.
406+ """
407+ fmtspecs = ConfigParser()
408+ try:
409+ files_read = fmtspecs.read([fmt_file])
410+ except configparser.Error:
411+ raise ChkCsvError("Error reading format specification file.", fmt_file)
412+ if len(files_read) == 0:
413+ raise ChkCsvError("Error reading format specification file.", fmt_file)
414+ # Convert ConfigParser object into a list of CsvChecker objects
415+ speccols = [ sect for sect in fmtspecs.sections() if sect != chkopts ]
416+ cols = {}
417+ for col in speccols:
418+ cols[col] = CsvChecker(fmtspecs, col, column_required, data_required)
419+ return cols
420+
421+
381422 def check_csv_file(csv_fname, cols, halt_on_err, columnexit, \
382423 linelength, caseinsensitive, encoding=None):
383424 """Check that all of the required columns and data are present in the CSV file, and that
384- the data conform to the appropriate type and other specification.
385- Arguments are: 1. The name of the CSV file to check; 2. A dictionary of
386- specifications (ChkCsv objects) indexed by column name; 3. Whether to exit
387- on the first error; 4. Whether to exit if the CSV file doesn't have
388- exactly the same columns in the format specifications; 5. Whether to
389- report an error if any data row has a different number of items than indicated
390- by the column headers; 6. Whether column names in the specifications and
391- CSV file should be compared case-insensitive; 7. The character encoding of
392- the CSV file.
425+ the data conform to the appropriate type and other specifications.
426+
427+ :param csv_fname: The name of the CSV file to check.
428+ :param cols: A dictionary of specifications (CsvChecker objects) indexed by column name.
429+ :param halt_on_err: Whether to exit on the first error.
430+ :param columnexit: Whether to exit if the CSV file doesn't have exactly the same columns in the format specifications.
431+ :param linelength: Whether to report an error if any data row has a different number of items than indicated by the column headers.
432+ :param casesensitive: Whether column names in the specifications and CSV file should be compared case-insensitively.
433+ :param encoding: The character encoding of the CSV file.
393434 """
394435 dialect = csv.Sniffer().sniff(open(csv_fname, "rt").readline())
395436 if encoding:
396437 inf = UnicodeReader(open(csv_fname, "rt"), dialect, encoding)
397438 else:
398439 inf = csv.reader(open(csv_fname, "rt"), dialect)
399- colnames = inf.next()
440+ colnames = next(inf)
400441 req_cols = [ c for c in cols if cols[c].column_required ]
401442 # Exit if all required columns are not present
402443 if caseinsensitive:
@@ -462,7 +503,7 @@
462503 if len(args)==0:
463504 parser.print_help()
464505 return 0
465- if len(args) <> 1:
506+ if len(args) != 1:
466507 raise ChkCsvError("A single argument, the name of the CSV file to check, must be provided.")
467508 csv_file = args[0]
468509 if not os.path.exists(csv_file):
@@ -474,22 +515,12 @@
474515 fmt_file = "%s.fmt" % fn
475516 if not os.path.exists(fmt_file):
476517 raise ChkCsvError("The format file does not exist.", fmt_file)
477- fmtspecs = ConfigParser.SafeConfigParser()
478- try:
479- files_read = fmtspecs.read([fmt_file])
480- except ConfigParser.Error:
481- raise ChkCsvError("Error reading format specification file.", fmt_file)
482- if len(files_read) == 0:
483- raise ChkCsvError("Error reading format specification file.", fmt_file)
518+ # Get format specifications as a list of ChkCsv objects from the configuration file.
484519 if opts.optsection:
485520 chkopts = opts.optsection
486521 else:
487522 chkopts = "chkcsvoptions"
488- # Convert ConfigParser object into a list of CsvChecker objects
489- speccols = [ sect for sect in fmtspecs.sections() if sect <> chkopts ]
490- cols = {}
491- for col in speccols:
492- cols[col] = CsvChecker(fmtspecs, col, opts.column_required, opts.data_required)
523+ cols = read_format_specs(fmt_file, opts.column_required, opts.data_required, chkopts)
493524 # Check the file
494525 errorlist = check_csv_file(csv_file, cols, opts.haltonerror,
495526 opts.columnexit, opts.linelength, opts.caseinsensitive, opts.encoding)
@@ -503,10 +534,10 @@
503534 if __name__=='__main__':
504535 try:
505536 status = main()
506- except ChkCsvError, msg:
537+ except ChkCsvError as msg:
507538 show_errors( [ (msg.errmsg, msg.infile, msg.line, msg.column) ] )
508539 exit(1)
509- except SystemExit, x:
540+ except SystemExit as x:
510541 sys.exit(x)
511542 except Exception:
512543 strace = traceback.extract_tb(sys.exc_info()[2])[-1:]
diff -r 1a7ea99a560b -r 0bcecc677719 doc/source/conf.py
--- a/doc/source/conf.py Sat Jul 28 16:59:29 2018 -0700
+++ b/doc/source/conf.py Sat Oct 27 13:44:49 2018 -0700
@@ -16,9 +16,10 @@
1616 # add these directories to sys.path here. If the directory is relative to the
1717 # documentation root, use os.path.abspath to make it absolute, like shown here.
1818 #
19-# import os
20-# import sys
19+import os
20+import sys
2121 # sys.path.insert(0, os.path.abspath('.'))
22+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__name__), '../../chkcsv')))
2223
2324
2425 # -- General configuration ------------------------------------------------
@@ -55,9 +56,9 @@
5556 # built documents.
5657 #
5758 # The short X.Y version.
58-version = u'0.8'
59+version = u'1.0'
5960 # The full version, including alpha/beta/rc tags.
60-release = u'0.8.0'
61+release = u'1.0.0'
6162
6263 # The language for content autogenerated by Sphinx. Refer to documentation
6364 # for a list of supported languages.
diff -r 1a7ea99a560b -r 0bcecc677719 doc/source/index.rst
--- a/doc/source/index.rst Sat Jul 28 16:59:29 2018 -0700
+++ b/doc/source/index.rst Sat Oct 27 13:44:49 2018 -0700
@@ -81,6 +81,24 @@
8181 -x, --exitonerror Exit when the first error is found.
8282
8383
84+.. _availability:
85+
86+Installation
87+================================
88+
89+
90+The chkcsv program is available on `PyPi <https://pypi.org/project/chkcsv/>`_.
91+It can be installed with:
92+
93+.. code-block:: none
94+
95+ pip install chkcsv
96+
97+By default, chkcsv.py will be installed as an executable script.
98+To use it as a library module, it must be copied to
99+Python's *site-packages* directory.
100+
101+
84102 Format Specifications
85103 ============================
86104
@@ -284,24 +302,37 @@
284302 pattern=(?i)(FT|M|CM)$
285303
286304
287-.. _availability:
288-
289-Availability
305+Module Documentation
290306 ================================
291307
308+To use chkcsv.py as a library module, it must be placed in either the
309+global or personal *site-packages* directory. The module provides the
310+following classes and functions to support reading of format specifications
311+from a file and checking a CSV file.
292312
293-The chkcsv program is available on `PyPi <https://pypi.org/project/chkcsv/>`_.
294-It can be installed with:
313+Classes
314+--------------------------------
295315
296-.. code-block:: none
316+.. module:: chkcsv
317+.. autoclass:: CsvChecker
318+ :members:
319+
297320
298- pip install chkcsv
321+Functions
322+-------------------------------
323+
324+.. autofunction:: read_format_specs
325+
326+.. autofunction:: check_csv_file
327+
328+.. autofunction:: show_errors
329+
299330
300331
301332 Copyright and License
302333 ================================
303334
304-Copyright (c) 2011, R.Dreas Nielsen
335+Copyright (c) 2011, 2018, R.Dreas Nielsen
305336
306337 This program is free software: you can redistribute it and/or modify it
307338 under the terms of the GNU General Public License as published by the
diff -r 1a7ea99a560b -r 0bcecc677719 setup.py
--- a/setup.py Sat Jul 28 16:59:29 2018 -0700
+++ b/setup.py Sat Oct 27 13:44:49 2018 -0700
@@ -1,21 +1,28 @@
11 from distutils.core import setup
22
33 setup(name='chkcsv',
4- version='0.8.0.2',
5- description="Check the format of a CSV file",
4+ version='1.0.0',
5+ description="Checks the format of a CSV file with respect to a specifed set of column names and types.",
66 author='Dreas Nielsen',
77 author_email='dreas.nielsen@gmail.com',
88 url='https://bitbucket.org/rdnielsen/chkcsv/',
99 scripts=['chkcsv/chkcsv.py'],
10+ requires=[],
11+ python_requires = '>=2.7',
1012 classifiers=[
1113 'Development Status :: 5 - Production/Stable',
1214 'Environment :: Console',
1315 'Intended Audience :: End Users/Desktop',
1416 'License :: OSI Approved :: GNU General Public License (GPL)',
1517 'Natural Language :: English',
18+ 'Programming Language :: Python :: 3',
19+ 'Programming Language :: Python :: 2.7',
1620 'Operating System :: OS Independent',
1721 'Topic :: Text Processing :: General',
18- 'Topic :: Office/Business'
22+ 'Topic :: Office/Business',
23+ 'Topic :: Scientific/Engineering',
24+ 'Topic :: Text Processing',
25+ 'Topic :: Utilities'
1926 ],
2027 long_description="""``chkcsv.py`` is a Python module and program
2128 that checks the format of data in a CSV file. It can check whether required
Show on old repository browser