#!/usr/bin/env python
"""
Author   : freeman
date     : 2019.02.17
Purpose  : To understand the effects of opioids Prescribing rates  in each US States. 
         :  
Comments : If you plan to run it, it will take a while to display the 700k rows of data.
         : There are 250k rows of data which are missing pertinent information!
         : This isnt a completed project, just my preliminary sketch of data processing.
data     : https://data.cms.gov/Medicare-Claims/Medicare-Part-D-Opioid-Prescriber-Summary-File-201/e4ka-3ncx/data        
example  : python parsedata.py Medicare_Part_D_Opioid_Prescriber_Summary_File_2014.csv
"""


import sys
import os


def getdata():
    
    items, ans, cnt1, cnt2 = [],[], 0,0
    with open(sys.argv[1].strip()) as f:
        data = f.readlines()

    # append data to a list
    for i in data:
        items.append(i)
	
    # eliminating empty element 7 and 8 area	
    for i in items:
        if i.split(',')[7] == "" or i.split(',')[8] == "":
            cnt1+=1
        else:
            ans.append(i)
            cnt2+=1

    # this will take a long time to print
    # print ans
    print "the numbers of records in this file which i didnt use are: " + str(cnt1)
    print "the numbers of records in this file i might use       are: " + str(cnt2)
    print "the total records in this file                        are: " +  str(cnt1+cnt2)

	
def main():

    getdata()


if __name__ == "__main__":
    main()