Skip to content

mxm, IT's mad science

Sections
Personal tools
You are here: Home » Papers » csv_import.py - A Simple Yet Very Practical CSV import module
Downloads
You can download mxm products here.

Due to it's technical and international nature, this section is in english.

Max M Has a blog too.

og er glad for mad

 

csv_import.py - A Simple Yet Very Practical CSV import module

I have made this simple module for converting a csv file to a list of dicts with unicode keys and values.

The module expects code to be in a data directory at the same level it is in itself.

Like: specific_importer.py csv_import.py data/ some_data.csv

This is the code:

    """
    It seems that I have a bunch of data import projects.
    I have made this simple module for converting a csv file to a list of dicts with unicode keys and values.
    keys must be on first line.
    """

    import csv
    from csv import DictReader

    import os, os.path, sys, shutil
    from os.path import normpath, dirname, exists, abspath, join
    from os import makedirs

    # filesystem helpers
    def mydir():
        if __name__ == '__main__':
            filename = sys.argv[0]
        else:
            filename = __file__
        return abspath(dirname(filename))

    def makepath(path):
        if not exists(path):
            makedirs(path)
        return normpath(abspath(path))

    def rmdir(path):
        # deletes all files and subdirs, then the dir
        for root, dirs, files in os.walk(path, topdown=False):
            for name in files:
                os.remove(join(root, name))
            for name in dirs:
                os.rmdir(join(root, name))

    def emptydir(path):
        "Remove existing dir"
        rmdir(path)
        makepath(path)

    class DataReader:

        """
        Reads raw data from csv files and combines them into complex data structures of
        primitive Python data types.
        """

        def __init__(self, fname, fieldnames=None, encoding='utf-8'):
            self.encoding = encoding
            self.fieldnames = fieldnames
            fpath = join(mydir(), 'data', fname)
            self.csvfile = open(fpath, 'r')
            dialect = self.sniffDialect()
            self.dictreader = DictReader(self.csvfile, fieldnames=self.fieldnames, dialect=dialect)

        def sniffDialect(self):
            for i in range(2):
                line2 = self.csvfile.readline()
            self.csvfile.seek(0)
            dialect = csv.Sniffer().sniff(line2)
            dialect.skipinitialspace = True
            return dialect

        def d2u(self, d):
            # dict to unicode
            e = self.encoding
            r = {}
            for key, val in d.items():
                r[key.decode(e)] = val.decode(e)
            return r

        def close(self):
            self.csvfile.close()

        def __iter__(self):
            return self

        def next(self):
            return self.d2u(self.dictreader.next())

    if __name__ == '__main__':

        dr = DataReader('some-file.csv')
        N_MAX = 100
        i = 0
        for data in dr:
            if not N_MAX or (i < N_MAX):
                print data
                i += 1
        dr.close()

        print 'done'

Created by maxm
Last modified 2008-05-23 11:16 PM