#!/usr/bin/env python """ Prerequisites: mysql connector from mysql.com and requests module Mysql DB plugin http://dev.mysql.com/downloads/connector/python/2.0.html Requests plugin http://docs.python-requests.org/ linux: sudo pip install requests windose: c:\python27\scripts\pip.exe install requests This software assume you have either mysql or mariaDB installed and with the ability to create Databases. I have mainly tested this software on windose and worked every time. I am not sure about if this software would function on any distros of linux. This code should be generic enough which will most likly works well in linux than windose. There are a few methods to rewrite this software into something a bit more robust as well as a bit less software agnostic. Next version, i can try using python sqlalchemy instead of the plugin from mysql. rewrite this entire ode to be object oriented along with seperate the username and password to an outside file. Usage: Fire up a linux cmd prompt or a DOS cmd prompt python ICD9Parser.py """ from __future__ import print_function import base64 import sys import os def printAuthor(): author = "YXNjaGVuYmFjaEBnbWFpbC5jb20=" print(base64.b64decode(author)) try: import mysql.connector from mysql.connector import errorcode except ImportError as err: print(err) print("Missing Mysql Connector plugin.") print("Please install Mysql Connector module.") exit(1) # Restructure the Mysql Connector Example from Mysql.com def createICD9Schema(): DB_NAME='icd9' icd9Tables = {} icd9Tables['icd9Code'] = ( "CREATE TABLE IF NOT EXISTS `icd9Code` (" " `ID` INT(11) NOT NULL AUTO_INCREMENT," " `icd9CodeID` VARCHAR(50) NULL DEFAULT NULL," " `icd9Disease` VARCHAR(250) NULL DEFAULT NULL," " `icd9Keyword` VARCHAR(250) NULL DEFAULT NULL," " `icd9Value` VARCHAR(1) NULL DEFAULT NULL," " `icd9Desc` VARCHAR(250) NULL DEFAULT NULL," " PRIMARY KEY (`ID`)" ") ENGINE=InnoDB") config = { 'user': 'USERNAME', 'password': 'YOURPASSWORD', 'host':'MYSQLServerHostName', 'database': 'mysql', 'raise_on_warnings': True} conn = mysql.connector.connect(**config) cur = conn.cursor(buffered=True) try: cur.execute("CREATE DATABASE {} DEFAULT CHARACTER SET 'latin1'".format(DB_NAME)) except mysql.connector.Error as err: print("Failed Creating Database: {}".format(err)) exit(1) try: conn.database = DB_NAME except mysql.connector.Error as err: if err.errno == errorcode.ER_BAD_DB_ERROR: createICD9Schema(cur) conn.database = DB_NAME else: print(err) exit(1) for name, ddl in icd9Tables.iteritems(): try: print("Creating table {}: ".format(name), end='') cur.execute(ddl) except mysql.connector.Error as err: if err.errno == errorcode.ER_TABLE_EXISTS_ERROR: print("Already Exists.") else: print(err.msg) else: print("OK") conn.commit() cur.close() conn.close() def getData(): try: import requests except ImportError as err: print(err) print("Missing requests plugin: Please Install requests module") exit(1) url = 'https://www.section111.cms.hhs.gov/MRA/help/ICD9_DX_Codes.txt' fn = os.path.basename(url) data = requests.get(url, verify=True) if data.ok: # DUMP Internet Data to local fs if sys.platform == "win32": path = os.getcwd() path=path + "\\" +fn f = open(path,'a') f.write(data.text) else: path = os.getcwd() path=path + "/" +fn f = open(path,'a') f.write(data.text) f.close() mess = data.text mess = mess.encode("utf8") #ascii will work too ascii < utf8 < unicode else: print("This ICD9 DX file doesnt seem to be in this location.") exit(1) return mess def parseData(data): import re icd9List=[] icd9Code=[] result = [] sql,tmp1,tmp2 = "","","" cnt = 1 # parsing each line for special SQL characters for line in data.splitlines(): line = re.sub(r"\'", "\\'", line) line = re.sub(r"\"", "\\\"", line) icd9List.append(line) # parsing each line to split it on spacing # then add it to a list for a in icd9List: cl = re.findall(r"\S+(?:\s\S+)*", a) icd9Code.append(cl) # generating SQL statements for each row of data # deciding the number of columns in that hhs.gov code file. for b in icd9Code: if len(b) == 3: for c in b: tmp1 = tmp1 + '\'' + c + '\'' + ',' tmp1 = tmp1 + 'NULL' + ',' + 'NULL' sql = 'INSERT INTO `icd9Code` VALUES ( ' + str(cnt) + ',' + tmp1 + ');' + '\n' result.append(sql) # very important be needing this line below tmp1,c,sql = "","","" cnt = cnt + 1 elif len(b) == 4: for d in b: tmp2 = tmp2 + '\'' + d + '\'' + ',' tmp2 = tmp2 + 'NULL' sql = 'INSERT INTO `icd9Code` VALUES ( ' + str(cnt) + ',' + tmp2 + ');' + '\n' result.append(sql) tmp2,d,sql = "","","" cnt = cnt + 1 else: print("Help, What is infinity + infinity ?!") exit(1) return result def insertDataToDB(data): tmp = data config = { 'user': 'USERNAME', 'password': 'YOURPASSWORD', 'host':'MYSQLServerHostName', 'database': 'icd9', 'raise_on_warnings': True} conn = mysql.connector.connect(**config) cur = conn.cursor(buffered=True) for sql in data: cur.execute(sql) conn.commit() cur.close() conn.close() return tmp def writeToFile(icd9Data): file = "icd9SQLData.sql" if sys.platform == "win32": path = os.getcwd() path=path + "\\" + file f = open(path,'a') for i in icd9Data: f.write(i) else: path = os.getcwd() path=path + "/" + file f = open(path,'a') for i in icd9Data: f.write(i) f.close() def main(): createICD9Schema() print("Downloading data file from hhs.gov.") a = getData() print("Done Downloading Data file from hhs.gov.") print("Parsing Data file to escape single and double quotes.") b = parseData(a) print("Done parsing data.") print("Inserting Data into Database. ") c = insertDataToDB(b) print("Done inserting Data to Database.") print("Saving ICD9_DX_Codes.txt and icd9SQLData.sql to your computer.") writeToFile(c) print("Done Saving files to your computer.") exit() main()