#!/bin/env python # Instructions: # Download Python 2.7.x first on windows # on windows grab this file called get-pip.py from https://pip.pypa.io/en/latest/installing.html # follow instruction there on that web page in installing python pip # Installing python-pip on linux is super easy: yum install python-pip # then for both OSes on DOS/kde shell prompt: # linux: pip install ipwhois # windows: c:\python27\scripts\pip.exe install ipwhois # Idea was: locating an accurate amount of user clicks internal and external IP addresses # on a particular set of data set/study without creating an account for random user in ether space. # DVN does not provide or seperate internal nor external file download counts. # In order to surpass this issue, one must crawl the log files to gather data. # In another word, data mining in its finest. DVN logs are weird, its Apache liked but not exactly, otherwise # i would just use the apache python log process module instead of writing something to remedy this issue. # # This Following code can be run on windows and linux OSes but not the first part. # Part 1 on an DVN instance, figuring out the number of total clicks from a year worth of logs # Grepping External IP addresses: cd $GFLOG/access # cat server_access_log.*.* | grep StudyID | grep -v "10\." | grep studyListingIndex | grep 200 > /var/tmp/data.txt # For Example: # cat server_access_log.*.* | grep 10015 | grep -v "10\." | grep studyListingIndex | grep 200 > /var/tmp/data.txt # Grepping Internal IP addresses: cd $GFLOG/access # cat server_access_log.*.* | grep fileId | grep "SubstituteYourInternalIP" | grep 200 > /var/tmp/data.txt # For Example: # cat server_access_log.*.* | grep fileId | grep "10\." | grep 200 > /var/tmp/data.txt # Not really a solution but more of a hack. # DVN version 3.6.x+ does have this tracking of file download feature but # isolating external and internal IP addresses was the key to this hack/code. # This code below will do a reverse lookup using ipwhois python module to view an individual # who has clicked on a download file. Its a bit slow in looking ip addresses, but one can enhance this code to # make it a local repository then query that first before looking it up on internet. # How to use this software on DOS Command prompt or KDE Konsole # ./processDVNLog.py InputFileFromAbove InsertYourOwnOutputFileName # Example: # ./processDVNLog.py data.txt study10014.txt from sys import argv from ipwhois import IPWhois from pprint import pprint # https://pypi.python.org/pypi/ipwhois def ingestData(): f=open(argv[1]) arr = [] large = [] fw = open(argv[2],'w') for line in f: arr = line.split() large.append(arr) f.close() a = 0 while a < len(large): tmp1 = large[a][0] tmp2 = tmp1.strip('"') obj = IPWhois(tmp2) ans = obj.lookup() large[a][0] = ans['nets'][0]['description'] fw.write(large[a][0]) fw.write('\n') a = a + 1 fw.write(str(len(large))) fw.write("\n") fw.close() def main(): ingestData() main()