#!/usr/bin/env python """ Author : freeman date : 2019.02.17 Purpose : To understand the effects of opioids Prescribing rates in each US States. : Comments : If you plan to run it, it will take a while to display the 700k rows of data. : There are 250k rows of data which are missing pertinent information! : This isnt a completed project, just my preliminary sketch of data processing. data : https://data.cms.gov/Medicare-Claims/Medicare-Part-D-Opioid-Prescriber-Summary-File-201/e4ka-3ncx/data example : python parsedata.py Medicare_Part_D_Opioid_Prescriber_Summary_File_2014.csv """ import sys import os def getdata(): items, ans, cnt1, cnt2 = [],[], 0,0 with open(sys.argv[1].strip()) as f: data = f.readlines() # append data to a list for i in data: items.append(i) # eliminating empty element 7 and 8 area for i in items: if i.split(',')[7] == "" or i.split(',')[8] == "": cnt1+=1 else: ans.append(i) cnt2+=1 # this will take a long time to print # print ans print "the numbers of records in this file which i didnt use are: " + str(cnt1) print "the numbers of records in this file i might use are: " + str(cnt2) print "the total records in this file are: " + str(cnt1+cnt2) def main(): getdata() if __name__ == "__main__": main()