from bs4 import BeautifulSoup
from string import capwords
import csv
import unicodedata
from enum import Enum

buildings_table = open("Buildings.csv", 'w')
permits_table = open("Permits.csv", 'w')
tokens_output_file = open("tokens_file.txt", 'w')
descriptions_eliminated_file = open("descriptions_eliminated.txt", 'w')
output_file = open("numbers.txt", 'w')
row_count=1
blocker_phrases = ['subdivide', 'sq ft addition to', 'adjust the boundary', 'Shoreline exemption', 'contaminated soil', 'change the use of', 'REZONE', 'homeless encampment', 'communications facility', 'communication utility', 'cancelled', 'CANCELLED', 'CANCELED', 'canceled', 'minor communications utility', 'change of use', 'Shoreline Substantial']
class token_type(Enum):
    story = 1
    cars = 2
    bicycles = 3
    number_before_a_number = 4
    building_type = 5
    sq_ft = 6
    unknown_number = 7
    unknown_other = 8
    residential_units = 9
    live_work_units = 10
    unknown_unit_type = 11
    townhouse = 12
    total_units = 13
    family_residence = 14
    env_crit_area = 15
    rowhouse = 16
    floating_house = 17
    existing = 18
    total_is = 19
    removal = 20
    trees = 21
    houses = 22
    cu_ft = 23
    parking_spot = 24
    hotel_rooms = 25
    hotel = 26
    untyped_rooms = 27
    congregate_residence = 28
    remain = 29
    apartments = 30
    retail = 31
    feet = 32
    assisted_living = 33
    beds = 34
    canceled = 35
    floors = 36
    restaurant = 37
    and_token = 38
    addition = 39
    office = 40
    towers = 41
    office_building = 42
    sentence_end = 43
    each = 44
    open_paren = 45
    close_paren = 46
    educational_institution = 47
    commercial = 48
    num_sign = 49
    date = 50
    townhouse_unit = 51
    efficiency_unit = 52
    duplex = 53
    apartment_building = 54
    residential_building = 55
    hotel_segment = 101

def write_csvs(token_list, permit_num):
    total_sqft = 0
    total_stories = 0
    unit_count = 0
    type_of_building = ""
    is_canceled = "false"
    total_cars = 0
    total_bikes = 0
    
    for n in token_list:
        if(n[0] == token_type.story):
            total_stories += int(n[1])
        if(n[0] == token_type.sq_ft):
            total_sqft += int(n[1])
        if(n[0] == token_type.cars):
            total_cars += int(n[1])
        if(n[0] == token_type.bicycles):
            total_bikes += int(n[1])
        if(n[0] == token_type.residential_units):
            unit_count += int(n[1])
        if(n[0] == token_type.live_work_units):
            unit_count += int(n[1])
        if(n[0] == token_type.unknown_unit_type):
            unit_count += int(n[1])
        if(n[0] == token_type.townhouse_unit):
            unit_count += int(n[1])
        if(n[0] == token_type.efficiency_unit):
            unit_count += int(n[1])
        if(n[0] == token_type.hotel_rooms):
            unit_count += int(n[1])
        if(n[0] == token_type.untyped_rooms):
            unit_count += int(n[1])
        if(n[0] == token_type.apartments):
            unit_count += int(n[1])
        if(n[0] == token_type.beds):
            unit_count += int(n[1])
        if(n[0] == token_type.total_units):
            unit_count = int(n[1])
        if(n[0] == token_type.canceled):
            is_canceled = "true"


    buildings_table.write(permit_num + "," + str(total_stories) + "," + str(total_sqft) + "," + str(unit_count) + "\n")
    #buildings_table.write("Stories,SqFt,Units,Type\n")
    permits_table.write(permit_num + "," + is_canceled+","+str(total_cars)+","+str(total_bikes)+"\n")
    #permits_table.write("Canceled,Cars,Bikes\n")

def pretty_print(token_list):
    output_str=""
    for n in token_list:
        if (output_str!=""):
            output_str=output_str+", "
        output_str = output_str+n[1]
        if (n[0] == token_type.story):
            output_str=output_str+" stories"
        if (n[0] == token_type.cars):
            output_str=output_str+" cars"
        if (n[0] == token_type.bicycles):
            output_str=output_str+" bikes"
        if (n[0] == token_type.number_before_a_number):
            output_str=output_str+" multiplier"
        if (n[0] == token_type.sq_ft):
            output_str=output_str+" sqft"
        if (n[0] == token_type.unknown_number):
            output_str=output_str+" is an untyped number"
        if (n[0] == token_type.residential_units):
            output_str=output_str+" residential units"
        if (n[0] == token_type.live_work_units):
            output_str=output_str+" live-work units"
        if (n[0] == token_type.unknown_unit_type):
            output_str=output_str+" untyped units"
        if (n[0] == token_type.townhouse):
            output_str=output_str+" townhouse buildings"
        if (n[0] == token_type.total_units):
            output_str=output_str+" total units"
        if (n[0] == token_type.family_residence):
            output_str=output_str+" single family residences"
        if (n[0] == token_type.env_crit_area):
            output_str=output_str+"environmentally critical area"
        if (n[0] == token_type.rowhouse):
            output_str=output_str+" rowhouse"
        if (n[0] == token_type.floating_house):
            output_str=output_str+"floating house"
        if (n[0] == token_type.total_is):
            output_str=output_str+"total ="
        if (n[0] == token_type.existing):
            output_str=output_str+"existing"
        if (n[0] == token_type.removal):
            output_str=output_str+"to be removed"
        if (n[0] == token_type.trees):
            output_str=output_str+" trees"
        if (n[0] == token_type.houses):
            output_str=output_str+" houses"
        if (n[0] == token_type.cu_ft):
            output_str=output_str+" cuyds"
        if (n[0] == token_type.parking_spot):
            output_str=output_str+" parking spots"
        if (n[0] == token_type.hotel_rooms):
            output_str=output_str+" hotel rooms"
        if (n[0] == token_type.hotel):
            output_str=output_str+"hotel"
        if (n[0] == token_type.untyped_rooms):
            output_str=output_str+" rooms"
        if (n[0] == token_type.congregate_residence):
            output_str=output_str+"congregate residence"
        if (n[0] == token_type.remain):
            output_str=output_str+"to remain"
        if (n[0] == token_type.apartments):
            output_str=output_str+" apartments"
        if (n[0] == token_type.retail):
            output_str=output_str+"retail space"
        if (n[0] == token_type.feet):
            output_str=output_str+" ft"
        if (n[0] == token_type.assisted_living):
            output_str=output_str+"retirement home"
        if (n[0] == token_type.beds):
            output_str=output_str+" beds"
        if (n[0] == token_type.canceled):
            output_str=output_str+"canceled"
        if (n[0] == token_type.floors):
            output_str=output_str+" floors"
        if (n[0] == token_type.restaurant):
            output_str=output_str+"restaurant space"
        if (n[0] == token_type.and_token):
            output_str=output_str+"and"
        if (n[0] == token_type.addition):
            output_str=output_str+"addition"
        if (n[0] == token_type.office):
            output_str=output_str+"office space"
        if (n[0] == token_type.office_building):
            output_str=output_str+"office building"
        if (n[0] == token_type.towers):
            output_str=output_str+" towers"
        if (n[0] == token_type.sentence_end):
            output_str=output_str+"//"
        if (n[0] == token_type.each):
            output_str=output_str+"each"
        if (n[0] == token_type.open_paren):
            output_str=output_str+"("
        if (n[0] == token_type.close_paren):
            output_str=output_str+")"
        if (n[0] == token_type.educational_institution):
            output_str=output_str+"educational building"
        if (n[0] == token_type.commercial):
            output_str=output_str+"commercial building"
        if (n[0] == token_type.num_sign):
            output_str=output_str+" #"
        if (n[0] == token_type.date):
            output_str=output_str
        if (n[0] == token_type.townhouse_unit):
            output_str=output_str+" townhouse units"
        if (n[0] == token_type.efficiency_unit):
            output_str=output_str+" small efficiency units"
        if (n[0] == token_type.duplex):
            output_str=output_str+" duplex building"
        if (n[0] == token_type.apartment_building):
            output_str=output_str+"apartment building"
        if (n[0] == token_type.residential_building):
            output_str=output_str+"residential building"

    return output_str+"\n"

#take in a row from the CSV and return the description cell
def get_description(line):
    columns=line.split(",")
    return columns[3]

#break string into tokens
def lexing(description_string):
    tokens = []
    token_in_progress = "";
    counter = 0
    while (len(description_string)>0):
        c=description_string[counter]
        if (c=="." or c=="," or c=="-" or c=="(" or c==")" or c=="#" or c=="/"):
            tokens.append(c)
        elif (c.isdigit()):
            token_in_progress = c
            ordinal = check_for_ordinal(c, description_string, counter)
            if (ordinal!=None):
                tokens.append(ordinal)
                counter+=2
            else:
                while (counter<len(description_string)):
                    next_char = char_peek(description_string, counter + 1)
                    if (next_char==None): break
                    elif (next_char==","):
                        next1=char_peek(description_string, counter + 2)
                        next2=char_peek(description_string, counter + 3)
                        next3=char_peek(description_string, counter + 4)
                        if (next1!=None and next2!=None and next3!=None):
                            if (next1.isdigit() and next2.isdigit() and next3.isdigit()):
                                counter+=1
                            else: break
                        else: break
                    elif (next_char.isdigit()):
                        token_in_progress = token_in_progress+next_char
                        counter+=1
                    else: break
                tokens.append(token_in_progress)
        elif (c.isalpha()):
            token_in_progress = c
            while (True):
                next_char = char_peek(description_string, counter + 1)
                if (next_char==None): break
                elif (next_char.isalpha()):
                    token_in_progress = token_in_progress+next_char
                    counter+=1
                else: break
            token_in_progress=convert_num_str_to_num(token_in_progress)
            tokens.append(token_in_progress)
        counter+=1
        if (counter>=len(description_string)):
            break
    return tokens

def char_peek(my_str, counter):
    if (counter>=len(my_str)):
        return None
    else:
        return my_str[counter]

def meaning_tokens(token_list):
    token_defs = []
    counter = 0
    while (len(token_list)>0):
        current = token_list[counter]
        #print current
        if (current.isdigit()):
            next = token_peek(token_list, counter+1)
            if (next=="(" and token_peek(token_list, counter+2).isdigit() and token_peek(token_list, counter+3)==")"):
                counter+=3
                next= token_peek(token_list, counter+1)
            if (next=="additional" or next=="attached"):
                counter+=1
                next = token_peek(token_list, counter+1)
            if (next=="new"):
                counter+=1
                next = token_peek(token_list, counter+1)
            if (next==","):
                counter+=1
                next = token_peek(token_list, counter+1)
            if (next==None):
                counter+=1
            elif (next=="-"):
                next = token_peek(token_list, counter+2)
                if (next=="story" or next=="Story"):
                    token_defs.append([token_type.story, current])
                    counter+=3
                elif (next=="units"):
                    token_defs.append([token_type.unknown_unit_type, current])
                    counter+=3
                elif (next=="unit"):
                    next = token_peek(token_list, counter+3)
                    if (next=="townhouse" or next=="townhouses"):
                        counter+=4
                        token_defs.append([token_type.townhouse_unit, current])
                    elif (next=="apartment"):
                        counter+=4
                        token_defs.append([token_type.apartments, current])
                    elif (next=="rowhouse"):
                        counter+=4
                        token_defs.append([token_type.rowhouse, current])
                    elif (next=="duplex"):
                        counter+=4
                        token_defs.append([token_type.unknown_unit_type, current])
                        token_defs.append([token_type.duplex, ""])
                    elif (next=="residential"):
                        counter+=4
                        token_defs.append([token_type.residential_units, current])
                        if (next=="building"):
                            counter+=1
                    else:
                        counter+=3
                        token_defs.append([token_type.unknown_unit_type, current])
                elif (next=="rooms"):
                    token_defs.append([token_type.untyped_rooms, current])
                    counter+=3
                elif (next=="car"):
                    token_defs.append([token_type.cars, current])
                    counter+=3
                elif (next=="room"):
                    token_defs.append([token_type.untyped_rooms, current])
                    counter+=3
                elif (next=="room"):
                    token_defs.append([token_type.untyped_rooms, current])
                    counter+=3
                elif (next=="single"):
                    next = token_peek(token_list, counter+3)
                    if (next=="family" or next=="Family"):
                        token_defs.append([token_type.family_residence, current])
                        counter+=4
                    else:
                        counter+=3
                elif (next=="live"):
                    live_work_counter = 3
                    next = token_peek(token_list, counter+live_work_counter)
                    if (next=="-" or next=="/"):
                        live_work_counter+=1
                        next = token_peek(token_list, counter+live_work_counter)
                    if (next=="work"):
                        token_defs.append([token_type.live_work_units, current])
                        live_work_counter+=1
                        next = token_peek(token_list, counter+live_work_counter)
                    if (next=="units" or next=="unit"):
                        live_work_counter+=1
                    counter = counter+live_work_counter
                else:
                    token_defs.append([token_type.unknown_number, current])
                    counter+=2
            elif (next.isdigit()):
                token_defs.append([token_type.number_before_a_number, current])
                counter+=1
            elif (next=="/"):
                d2 = token_peek(token_list, counter+2)
                d3 = token_peek(token_list, counter+3)
                d4 = token_peek(token_list, counter+2)
                if (d2!=None and d3!=None and d4!=None):
                    if (d2.isdigit() and d3=="/" and d4.isdigit()):
                        token_defs.append([token_type.date, current+"/"+d2+d3+d4])
                        counter+=5
                    else:
                        counter+=2
                else:
                    counter+=2
            elif (next == "containing"):
                token_defs.append([token_type.number_before_a_number, current])
                counter+=1
            elif (next=="apartment"):
                token_defs.append([token_type.apartments, current])
                counter+=2
                if (token_peek(token_list, counter)=="units"):
                    counter+=1
            elif (next == "houses"):
                token_defs.append([token_type.houses,current])
                counter+=2
            elif (next == "existing"):
                token_defs.append([token_type.number_before_a_number, current])
                counter+=1
            elif (next == "tower" or next=="towers"):
                token_defs.append([token_type.towers, current])
                counter+=2
            elif (next == "single" or next=="Single"):
                next = token_peek(token_list, counter+2)
                if (next=="family" or next=="Family"):
                    token_defs.append([token_type.family_residence, current])
                    counter+=3
                else:
                    counter+=2
            elif (next==","):
                next = token_peek(token_list, counter+2)
                if (next.isdigit()):
                    token_defs.append([token_type.number_before_a_number, current])
                    counter+=2
                else:
                    token_defs.append([token_type.unknown_number, current])
                    counter+=2
            elif (next=="story" or next=="STORY" or next=="stories"):
                token_defs.append([token_type.story, current])
                counter+=2
            elif (next=="sf" or next=="SF"):
                token_defs.append([token_type.sq_ft, current])
                counter+=2
            elif (next=="sq" or next=="Sq" or next=="SQ" or next=="sa" or next=="square" or next=="sqare"):
                sqft_counter = 2
                sqft_next = token_peek(token_list, counter+sqft_counter)
                if (sqft_next =="."):
                    sqft_counter+=1
                    sqft_next = token_peek(token_list, counter+sqft_counter)
                if (sqft_next =="ft" or sqft_next =="Ft" or sqft_next =="FT" or sqft_next=="feet"):
                    token_defs.append([token_type.sq_ft, current])
                    sqft_counter+=1
                    sqft_next = token_peek(token_list, counter+sqft_counter)
                if  (sqft_next=="."):
                    sqft_counter+=1
                counter = counter+sqft_counter
            elif (next=="cu" or next=="Cu" or next=="CU" or next=="cubic"):
                cuft_counter = 2
                cuft_next = token_peek(token_list, counter+cuft_counter)
                if (cuft_next =="."):
                    cuft_counter+=1
                    cuft_next = token_peek(token_list, counter+cuft_counter)
                if (cuft_next =="yd" or cuft_next =="Yd" or cuft_next =="YD" or cuft_next =="yds" or cuft_next =="Yds" or cuft_next =="YDS" or cuft_next=="yards"):
                    token_defs.append([token_type.cu_ft, current])
                    cuft_counter+=1
                    cuft_next = token_peek(token_list, counter+cuft_counter)
                if  (cuft_next=="."):
                    cuft_counter+=1
                counter = counter+cuft_counter
            elif (next=="feet" or next=="ft"):
                token_defs.append([token_type.feet, current])
                counter+=2
                if (token_peek(token_list, counter)=="."):
                    counter+=1
            elif (next=="vehicles" or next=="vehicle" or next=="car"):
                token_defs.append([token_type.cars, current])
                counter+=2
            elif (next=="bicycles"):
                token_defs.append([token_type.bicycles, current])
                counter+=2
            elif (next=="unit"):
                next = token_peek(token_list, counter+2)
                if (next=="residential"):
                    token_defs.append([token_type.residential_units, current])
                    counter+=3
                    if (token_peek(token_list, counter)=="building"):
                        counter+=1
                elif (next=="townhouse" or next=="townhouses"):
                    token_defs.append([token_type.townhouse_unit, current])
                    counter+=3
                    if (token_peek(token_list, counter)=="structures"):
                        counter+=1
                elif (next=="-"):
                    if (token_peek(token_list, counter+3)=="townhouse"):
                        counter+=4
                        token_defs.append([token_type.townhouse_unit, current])
                elif (next=="apartment"):
                    token_defs.append([token_type.apartments, current])
                    counter+=3
                    if (token_peek(token_list, counter)=="building"):
                        counter+=1
                #elif (next=="'" and token_peek(token_list, counter+3)=="s" and token_peek(token_list, counter+4)=="total"):
                #    token_defs.append([token_type.total_units, current])
                #    counter+=5
                elif (next=="structure"):
                    token_defs.append([token_type.unknown_unit_type, current])
                    counter+=3
                elif (next=="affordable" and token_peek(token_list, counter+3)=="housing" and token_peek(token_list, counter+4)=="residential"):
                    token_defs.append([token_type.residential_units, current])
                    counter+=5
                elif (next==","):
                    if (token_peek(token_list, counter+3)=="residential"):
                        counter+=4
                        token_defs.append([token_type.residential_units, current])
                    else:
                        counter+=3
                        token_defs.append([token_type.unknown_number, current])
                elif (next=="rowhouse"):
                    token_defs.append([token_type.rowhouse, current])
                    counter+=3
                else:
                    counter+=2
                    token_defs.append([token_type.unknown_number, current])
            elif (next=="units"):
                next = token_peek(token_list, counter+2)
                if (next=="total"):
                    token_defs.append([token_type.total_units, current])
                    counter+=3
                else:
                    token_defs.append([token_type.unknown_unit_type, current])
                    counter+=2
            elif (next=="dwelling"):
                next = token_peek(token_list, counter+2)
                if (next=="units"):
                    token_defs.append([token_type.residential_units, current])
                    counter+=3
                else:
                    token_defs.append([token_type.unknown_unit_type, current])
                    counter+=2
            elif (next=="residential"):
                counter+=2
                if (token_peek(token_list, counter)=="units"):
                    counter+=1
                    if (token_peek(token_list,counter)=="total"):
                        token_defs.append([token_type.total_units, current])
                        counter+=1
                    else:
                        token_defs.append([token_type.residential_units, current])
                else:
                    token_defs.append([token_type.residential_units, current])
            elif (next=="live"):
                live_work_counter = 2
                next = token_peek(token_list, counter+live_work_counter)
                if (next=="-" or next=="/"):
                    live_work_counter+=1
                    next = token_peek(token_list, counter+live_work_counter)
                if (next=="work"):
                    token_defs.append([token_type.live_work_units, current])
                    live_work_counter+=1
                    next = token_peek(token_list, counter+live_work_counter)
                if (next=="units" or next=="unit"):
                    live_work_counter+=1
                counter = counter+live_work_counter
            elif (next=="sleeping"):
                next = token_peek(token_list, counter+2)
                counter+=2
                if (next=="room" or next=="rooms"):
                    counter+=1
                    token_defs.append([token_type.untyped_rooms, current])
            elif (next=="total"):
                next = token_peek(token_list, counter+2)
                if (next=="units"):
                    token_defs.append([token_type.total_units, current])
                    counter+=2
                else:
                    token_defs.append([token_type.unknown_number, current])
                    counter+=2
            elif (next=="new"):
                next = token_peek(token_list, counter+2)
                if (next=="trees"):
                    token_defs.append([token_type.trees, current])
                    counter+=2
                else:
                    counter+=1
            elif (next=="surface" or next=="vehicular"):
                next = token_peek(token_list, counter+2)
                if (next=="parking"):
                    token_defs.append([token_type.parking_spot, current])
                    counter+=3
                    next = token_peek(token_list, counter)
                    if (next=="spot" or next=="spots"):
                        counter+=1
                else:
                    counter+=2
            elif (next=="parking"):
                next = token_peek(token_list, counter+2)
                token_defs.append([token_type.parking_spot, current])
                counter+=2
                if (next=="space" or next=="spaces"):
                    counter+=1
            elif (next=="spaces"):
                token_defs.append([token_type.parking_spot, current])
                counter+=2
            elif (next=="hotel" or next=="HOTEL"):
                if (token_peek(token_list, counter+2)=="rooms"):
                    counter+=3
                else:
                    counter+=2
                token_defs.append([token_type.hotel, ""])
                token_defs.append([token_type.hotel_rooms, current])
            elif (next=="room" or next=="ROOM"):
                next=token_peek(token_list, counter+2)
                if (next=="hotel" or next=="HOTEL"):
                    counter+=3
                    token_defs.append([token_type.hotel, ""])
                    token_defs.append([token_type.hotel_rooms, current])
                else:
                    counter+=2
                    token_defs.append([token_type.untyped_rooms, current])
            elif (next=="rooms"):
                counter+=2
                token_defs.append([token_type.untyped_rooms, current])
            elif (next=="bedrooms"):
                counter+=2
                token_defs.append([token_type.untyped_rooms, current])
            elif (next=="beds"):
                counter+=2
                token_defs.append([token_type.beds, current])
            elif (next=="floors"):
                counter+=2
                token_defs.append([token_type.floors, current])
            elif (next=="townhouse"):
                counter+=2
                if (token_peek(token_list, counter)=="units"):
                    counter+=1
                    token_defs.append([token_type.townhouse_unit, current])
                else:
                    token_defs.append([token_type.townhouse, current])
            elif (next=="rowhouses"):
                counter+=2
                token_defs.append([token_type.rowhouse, current])
            elif (next=="duplex"):
                counter+=2
                token_defs.append([token_type.duplex, current])
            elif (next=="small" and token_peek(token_list, counter+2)=="efficiency" and token_peek(token_list, counter+3)=="dwelling" and token_peek(token_list, counter+4)=="units"):
                counter+=5
                token_defs.append([token_type.efficiency_unit, current])
            elif (next=="efficiency" and token_peek(token_list, counter+2)=="units"):
                counter+=3
                token_defs.append([token_type.efficiency_unit, current])
            elif (next=="below" and token_peek(token_list, counter+2)=="-" and token_peek(token_list, counter+3)=="grade" and token_peek(token_list, counter+4)=="parking"):
                counter+=5
                token_defs.append([token_type.parking_spot, current])
            elif (next=="assisted"):
                counter+=2
                next = token_peek(token_list, counter)
                if (next=="living"):
                    counter+=1
                    next = token_peek(token_list, counter)
                    token_defs.append([token_type.unknown_unit_type, current])
                    token_defs.append([token_type.assisted_living, ""])
                    if (next=="facility"):
                        counter+=1
                counter+=1
            elif (next=="congregate"):
                token_defs.append([token_type.congregate_residence, current])
                counter+=2
                next=token_peek(token_list, counter+1)
                if (next=="residence" or next=="residences"):
                    counter+=1

            else:
                token_defs.append([token_type.unknown_number, current])
                counter+=1
        #----------------------------Alpha------------------------------
        elif (current.isalpha()):
            if (current=="single"):
                next = token_peek(token_list, counter+1)
                if (next=="family"):
                    next = token_peek(token_list, counter+2)
                    if (next=="residence" or next=="residences" or next=="structures" or next=="structures"):
                        token_defs.append([token_type.family_residence, ""])
                        counter+=3
                    elif (next=="dwelling"):
                        token_defs.append([token_type.family_residence, ""])
                        counter+=3
                        if (token_peek(token_list, counter)=="unit"):
                            counter+=1
                    else:
                        token_defs.append([token_type.family_residence, ""])
                        counter+=2
                else:
                    #token_defs.append([token_type.unknown_other, str(1)])
                    counter+=1
            elif (current=="environmentally"):
                next = token_peek(token_list, counter+1)
                if (next=="critical"):
                    next = token_peek(token_list, counter+2)
                    if (next=="area"):
                        token_defs.append([token_type.env_crit_area, ""])
                        counter+=3
                    else:
                        counter+=1
                else:
                    counter+=1
            elif (current=="rowhouse"):
                token_defs.append([token_type.rowhouse, ""])
                counter+=1
            elif (current=="floating"):
                next = token_peek(token_list, counter+1)
                if (next=="home"):
                    token_defs.append([token_type.floating_house, ""])
                    counter+=2
                else:
                    counter+=1
            elif (current=="existing" or current=="Existing"):
                token_defs.append([token_type.existing, ""])
                counter+=1
            elif (current=="totaling"):
                token_defs.append([token_type.total_is, ""])
                counter+=1
            elif (current=="apartment"):
                token_defs.append([token_type.apartment_building, ""])
                counter+=1
            elif (current=="be" or current=="BE"):
                next = token_peek(token_list, counter+1)
                if (next=="fully"):
                    counter+=1
                    next = token_peek(token_list, counter+1)
                if (next=="removed" or current=="REMOVED"):
                    token_defs.append([token_type.removal, ""])
                    counter+=2
                elif (next=="demolished" or current=="DEMOLISHED"):
                    token_defs.append([token_type.removal, ""])
                    counter+=2
                else:
                    counter+=1
            elif (current=="total"):
                next = token_peek(token_list, counter+1)
                counter+=1
                if (next=="of"):
                    token_defs.append([token_type.total_is, ""])
                    counter+=1
                elif (next.isdigit() and token_peek(token_list, counter+2)=="units"):
                    counter+=2
                    token_defs.append([token_type.total_units, next])
                else:
                    token_defs.append([token_type.total_is, ""])
            elif (current=="hotel"):
                token_defs.append([token_type.hotel, ""])
                counter+=1
            elif (current=="congregate"):
                token_defs.append([token_type.congregate_residence, ""])
                counter+=1
                next=token_peek(token_list, counter+1)
                if (next=="residence" or next=="residences"):
                    counter+=1
            elif (current=="demolition"):
                token_defs.append([token_type.removal, ""])
                counter+=1
            elif (current=="each"):
                token_defs.append([token_type.each, ""])
                counter+=1
            elif (current=="remain"):
                token_defs.append([token_type.remain, ""])
                counter+=1
            elif (current=="retail" or current=="retai" or current=="Retail"):
                token_defs.append([token_type.retail, ""])
                counter+=1
            elif (current=="restaurant" or current=="restaurants" or current=="bakery"):
                token_defs.append([token_type.restaurant, ""])
                counter+=1
            elif (current=="office"):
                counter+=1
                if (token_peek(token_list, counter)=="building"):
                    counter+=1
                    token_defs.append([token_type.office_building, ""])
                else:
                    token_defs.append([token_type.office, ""])
            elif (current=="assisted"):
                counter+=1
                next = token_peek(token_list, counter)
                if (next=="living"):
                    counter+=1
                    next = token_peek(token_list, counter)
                    token_defs.append([token_type.assisted_living, ""])
                    if (next=="facility"):
                        counter+=1
            elif (current=="cancelled" or current=="CANCELLED" or current=="canceled" or current=="CANCELED" or current=="Canceld" or current=="Canceled" or current=="Cancelled"):
                counter+=1
                token_defs.append([token_type.canceled, ""])
            elif (current=="and"):
                counter+=1
                token_defs.append([token_type.and_token, ""])
            elif (current=="addition" or current=="Addition"):
                counter+=1
                token_defs.append([token_type.addition, ""])
            elif (current=="live" and token_peek(token_list, counter+1)=="-" and token_peek(token_list, counter+2)=="work" and token_peek(token_list, counter+3)=="unit"):
                counter+=4
                token_defs.append([token_type.live_work_units, "1"])
            elif (current=="live"):
                    next_lw = token_peek(token_list, counter+1)
                    if (next_lw =="/" or next_lw=="-"):
                        counter+=1
                    if (token_peek(token_list, counter+1)=="work"):
                        token_defs.append([token_type.live_work_units, "1"])
                        counter+=2
                        if (token_peek(token_list, counter+2)=="unit"):
                            counter+=1
                    else:
                        counter+=1
            elif (current=="residential"):
                if (token_peek(token_list, counter+1)=="building" and token_peek(token_list, counter+2)=="with" and token_peek(token_list, counter+3).isdigit()):
                    if token_peek(token_list, counter+4)=="sq":
                        token_defs.append([token_type.residential_building, ""])
                        counter+=3
                    else:
                        token_defs.append([token_type.residential_units, token_peek(token_list, counter+3)])
                        counter+=4
                elif (token_peek(token_list, counter+1)=="building"):
                    token_defs.append([token_type.residential_building, ""])
                    counter+=2
                else:
                    counter+=1
            elif (current=="townhouse"):
                counter+=1
                token_defs.append([token_type.townhouse, ""])
            elif (current=="educational"):
                counter+=1
                token_defs.append([token_type.educational_institution, ""])
            elif (current=="commercial"):
                counter+=1
                token_defs.append([token_type.commercial, ""])
            else:
                counter+=1
        elif (current=="."):
            counter+=1
            token_defs.append([token_type.sentence_end, ""])
        elif (current=="("):
            counter+=1
            token_defs.append([token_type.open_paren, ""])
        elif (current==")"):
            counter+=1
            token_defs.append([token_type.close_paren, ""])
        elif (current=="#"):
            next = token_peek(token_list, counter+1)
            if (next.isdigit()):
                counter+=2
                token_defs.append([token_type.num_sign, next])
            else:
                counter+=1
                token_defs.append([token_type.num_sign, ""])
        else:
            #token_defs.append([token_type.unknown_other, token_list[counter]])
            counter+=1
        if (counter>=len(token_list)):
            break
    return token_defs


class Building_instance(object):
    count_stories = 0
    bldg_type = "none"
    has_hotel = False
    hotel_units = 0
    total_unit_count = 0

class Permit(object):
    building_array = []
    existing_to_be_removed = False
    environmentally_critical_area = False
    vehicles = []
    mixed_use = False


#translate token arrays into objects
def interpret_tokens(token_array):
    t_pointer = 0
    building_array = []
    for token in token_array:
        if (token == token_type.story):
            story_b = null;
            for b in building_array:
                if b.count_stories==0:
                    story_b = b
                    break
            if story_b==null:
                story_b=Building_instance()
                building_array.append=story_b
            story_b.count_stories = token[1]
        elif (token == token_type.hotel):
            hotel_b = null;
            for b in building_array:
                if b.has_hotel==False:
                    hotel_b = b
                    break
            if hotel_b==null:
                hotel_b=Building_instance()
                building_array.append=hotel_b
            hotel_b.has_hotel = True
        elif (token == token_type.hotel_rooms):
            hotel_b = null;
            for b in building_array:
                if b.hotel_units==0:
                    hotel_b = b
                    break
            if hotel_b==null:
                hotel_b=Building_instance()
                building_array.append=hotel_b
            hotel_b.has_hotel=True
            hotel_b.hotel_units = token[1]
        t_pointer+=1
    return building_array

#class Permit_node(Object):
#    children = []
#    mixed_use=False

#class Building_node(Object):
#    children = []
#    stories = null
#    total_units = null
#    mixed_use=False

#class Hotel_node(object, n):
#    num_rooms = n

#def token_list_to_tree(token_array):
#    new_token_array = []
#    t_counter = 0
#    for token in token_array:
#        if token[0]==token_type.hotel:
#            room_search=get_hotel_rooms(token_array, t_counter+1)
#            if (room_search!=null):
#                hotel_class_token = Hotel_node(room_search)
#                new_token_array.append(
#    return new_token_array

def get_hotel_rooms(my_tokens, counter):
    while (counter<len(my_tokens)):
        next_token = token_peek(my_tokens, counter)
        if (next_token[0]==token_type.hotel_rooms or next_token[0]==token_type.untyped_rooms):
            return next_token[1]
        else:
            counter+=1
    return null

def token_peek(my_tokens, counter):
    if (counter>=len(my_tokens)):
        return None
    else:
        return my_tokens[counter]

def check_for_ordinal(first_digit, my_str, counter):
    second_digit = token_peek(my_str, counter+1)
    third_digit = token_peek(my_str, counter+2)
    if (second_digit==None or third_digit==None):
        return None
    elif (first_digit=="1" and second_digit=="s" and third_digit=="t"):
        return "first"
    elif (first_digit=="2" and second_digit=="n" and third_digit=="d"):
        return "second"
    elif (first_digit=="3" and second_digit=="r" and third_digit=="d"):
        return "third"
    elif (first_digit=="4" and second_digit=="t" and third_digit=="h"):
        return "fourth"
    elif (first_digit=="5" and second_digit=="t" and third_digit=="h"):
        return "fifth"
    elif (first_digit=="6" and second_digit=="t" and third_digit=="h"):
        return "sixth"
    elif (first_digit=="7" and second_digit=="t" and third_digit=="h"):
        return "seventh"
    elif (first_digit=="8" and second_digit=="t" and third_digit=="h"):
        return "eighth"
    elif (first_digit=="9" and second_digit=="t" and third_digit=="h"):
        return "ninth"
    else:
        return None


def convert_num_str_to_num(my_str):
    if (my_str=="zero" or my_str=="Zero" or my_str=="ZERO"):
        return "0"
    elif (my_str=="one" or my_str=="One" or my_str=="ONE"):
        return "1"
    elif (my_str=="two" or my_str=="Two" or my_str=="TWO"):
        return "2"
    elif (my_str=="three" or my_str=="Three" or my_str=="THREE"):
        return "3"
    elif (my_str=="four"  or my_str=="Four" or my_str=="FOUR"):
        return "4"
    elif (my_str=="five" or my_str=="Five" or my_str=="FIVE"):
        return "5"
    elif (my_str=="six" or my_str=="Six" or my_str=="SIX"):
        return "6"
    elif (my_str=="seven" or my_str=="Seven" or my_str=="SEVEN"):
        return "7"
    elif (my_str=="eight" or my_str=="Eight" or my_str=="EIGHT"):
        return "8"
    elif (my_str=="nine" or my_str=="Nine" or my_str=="NINE"):
        return "9"
    elif (my_str=="ten" or my_str=="Ten" or my_str=="TEN"):
        return "9"
    else: return my_str

def parse_with_csv_module ():
    with open("Land_Use_Permits.csv") as f:
        reader = csv.reader(f)
        first_row = True
        for row in reader:
            if (first_row):
                csv_writer = csv.writer(parse_output_file)
                csv_writer.writerow(row+['Story 1', 'Story 2', 'Story 3', 'Story 4'])
                first_row=False
            else:
                lexing(row[3])

def free_from_blocked_lines (description):
    for phrase in blocker_phrases:
        if phrase.lower() in description.lower():
            return False
    return True

def parse_for_descriptions ():
    with open("Land_Use_Permits.csv") as f:
        reader = csv.reader(f)
        first_row = True
        for row in reader:
            if (first_row):
                first_row=False
                buildings_table.write("Permit Number,Stories,SqFt,Units\n")
                permits_table.write("Permit Number,Canceled,Cars,Bikes\n")
            else:
                parse_descriptions(row[3], row[0])



def parse_descriptions(row, permit_num):
    global row_count
    if free_from_blocked_lines(row):
        parsed = meaning_tokens(lexing(row))
        tokens_output_file.write("Row "+str(row_count) +" ---------------------------\n")
        tokens_output_file.write(row +"\n\n")
        tokens_output_file.write(pretty_print(parsed)+"\n\n")
        write_csvs(parsed, permit_num)
        if (row_count==2):
            interpret_tokens(parsed)
        row_count+=1
    else:
        descriptions_eliminated_file.write(row+"\n")

#print pretty_print(meaning_tokens(lexing("Land Use Application to allow a four-story 88 unit residential building with 4 live-work units and 3,229 sq. ft. of retail use located at street level. Parking for 67 vehicles will be provided below grade.")))

parse_for_descriptions()