import sys
import struct

data='''
location       job          time      count
2,3            designer     1         3
1,8            manager      1         3
3,11           programmer   1         6
5,6            manager      1         8
11,10          designer     1         5
12,4           programmer   1         5
13,4           designer     1         6
'''

job_code = { 'designer':0, 'programmer':1, 'manager':2 }

columns = None

class Record(object):
    def __init__(self):
        pass

records = []

# assuming there are dimensions called time, location, count.
# other dimensions will be considered categorical dimensions
# of up to 255 different values

LOCATION, TIME, COUNT, CATEGORICAL = range(4)
def type_from_name(c):
    if c == 'location':
        return LOCATION
    elif c == 'time':
        return TIME
    elif c == 'count':
        return COUNT
    else:
        return CATEGORICAL

# collect records
line_no = 0
for line in data.split('\n'):
    line_no += 1
    line = line.strip()
    if len(line) == 0:
        continue

    print line
    tokens = line.split()
    print tokens
    
    # read header
    if columns == None:
        columns = [(x,type_from_name(x)) for x in tokens if len(x) > 0]
    else:
        tokens = [x for x in tokens if len(x) > 0]

        if len(tokens) != len(columns):
            raise Exception("Problem on line %d: tokens and columns are different")

        record = Record()
        for i in xrange(len(tokens)):
            column_name, column_type = columns[i]
            value = tokens[i]

            if column_type == LOCATION: # x,y
                value = [int(x) for x in  value.split(',')]
            elif column_type == TIME or column_type == COUNT:
                value = int(value)
            elif column_type == CATEGORICAL:
                codes = eval("%s_code" % column_name) # assuming there is a map with the coding of that category
                # assume categorical
                value = codes[value]
                
            record.__dict__[column_name] = value

        records.append(record)
        print record.__dict__


# write nanocube-ready .dmp file
ostream = open('floorplan-nanocube-ready.dmp','w')

def field_type_for(column_type):
    grid_levels = 4 # grid of 2^4 to 2^4
    if column_type == LOCATION:
        return "nc_dim_quadtree_" + str(grid_levels)
    elif column_type == TIME:
        return "nc_dim_time_2" # make it 2 bytes time bins can go from 0 to 2^16-1
    elif column_type == COUNT:
        return "nc_var_uint_4" # make it 4 bytes count bins can go from 0 to 2^32-1
    elif column_type == CATEGORICAL:
        return "nc_dim_cat_1"

ostream.write("name: floorplan\n")
for i in xrange(len(columns)):
    column_name, column_type = columns[i]
    ostream.write("field: %s %s\n" % (column_name, field_type_for(column_type)))

for i in xrange(len(columns)):
    column_name, column_type = columns[i]
    if column_type == CATEGORICAL:
        codes = eval("%s_code" % column_name) # assuming there is a map with the coding of that category
        inverted_codes = dict([(v,k) for k,v in codes.iteritems()])
        keys = sorted(inverted_codes.keys())
        for k in keys:
            ostream.write("valname: %s %d %s\n" % (column_name, k, inverted_codes[k]))

    
ostream.write("\n") # end of header indication

for r in records: # write binary records
    for column_name,column_type in columns:
        if column_type == LOCATION:
            ostream.write(struct.pack("<II",r.location[0],r.location[1]))
        elif column_type == TIME:
            ostream.write(struct.pack("<H",r.time))
        elif column_type == COUNT:
            ostream.write(struct.pack("<I",r.count))
        elif column_type == CATEGORICAL:
            ostream.write(struct.pack("<B",r.__dict__[column_name]))