| 1 | #! /usr/bin/python |
|---|
| 2 | # |
|---|
| 3 | # $Id: $ |
|---|
| 4 | |
|---|
| 5 | ''' This script pulls information about user activities from the |
|---|
| 6 | dCache billing database and logs. This will be useful if there is a |
|---|
| 7 | security incident at your site and you want to pick out all relevant |
|---|
| 8 | information about the activity of a particular user DN. |
|---|
| 9 | |
|---|
| 10 | Usage |
|---|
| 11 | ----- |
|---|
| 12 | |
|---|
| 13 | $ python dcache-billing-query.py -s "2008-01-23 12:00:00" -e "2008-01-24 23:00:00" -i "User DN" |
|---|
| 14 | |
|---|
| 15 | Once you have pinned down the start and end times to within a day of |
|---|
| 16 | each other, run the script again with the -l option to query the |
|---|
| 17 | billing log files (with out -l it just looks at the billing |
|---|
| 18 | database). |
|---|
| 19 | |
|---|
| 20 | References |
|---|
| 21 | ---------- |
|---|
| 22 | |
|---|
| 23 | Script: http://trac.dcache.org/trac.cgi/browser/contributed/trunk/billing-query |
|---|
| 24 | PyGreSQL: http://www.pygresql.org/ |
|---|
| 25 | ''' |
|---|
| 26 | |
|---|
| 27 | __author__ = 'Greig A Cowan' |
|---|
| 28 | __date__ = 'March 2008' |
|---|
| 29 | __version__ = 0.1 |
|---|
| 30 | |
|---|
| 31 | from time import strptime |
|---|
| 32 | import sys |
|---|
| 33 | import commands |
|---|
| 34 | import re |
|---|
| 35 | import pg |
|---|
| 36 | from optparse import OptionParser |
|---|
| 37 | |
|---|
| 38 | def main(): |
|---|
| 39 | '''Parse command line options, call methods''' |
|---|
| 40 | parser = OptionParser( |
|---|
| 41 | usage = 'usage: %prog [options]') |
|---|
| 42 | parser.add_option('-d', '--debug', dest='debug', action='store_true', |
|---|
| 43 | help='use debug flag only for testing.') |
|---|
| 44 | parser.add_option('-s', '--start', dest='start', |
|---|
| 45 | default='2008-03-03 00:00:00', |
|---|
| 46 | help='start timestamp') |
|---|
| 47 | parser.add_option('-e', '--end', dest='end', |
|---|
| 48 | default='2008-03-05 00:00:00', |
|---|
| 49 | help='end timestamp') |
|---|
| 50 | parser.add_option('-b', '--billinghost', dest='host', |
|---|
| 51 | default='localhost', |
|---|
| 52 | help='host where the database is running') |
|---|
| 53 | parser.add_option('-l', '--logs', dest='log', |
|---|
| 54 | action='store_true', |
|---|
| 55 | help='set this option if you want to parse the \ |
|---|
| 56 | billing logs on the day of the START and END times') |
|---|
| 57 | parser.add_option('-i', '--id', dest='identity', |
|---|
| 58 | default='/C=UK/O=eScience/OU=Edinburgh/L=NeSC/CN=greig cowan', |
|---|
| 59 | help='user DN') |
|---|
| 60 | |
|---|
| 61 | # Parse commane-line input |
|---|
| 62 | (options, args) = parser.parse_args() |
|---|
| 63 | try: |
|---|
| 64 | input_filename = args |
|---|
| 65 | except ValueError: |
|---|
| 66 | parser.print_help() |
|---|
| 67 | |
|---|
| 68 | if len(args) > 1: |
|---|
| 69 | parser.error('incorrect number of arguments') |
|---|
| 70 | |
|---|
| 71 | db = pg.connect(dbname='billing', host=options.host, user='srmdcache') |
|---|
| 72 | |
|---|
| 73 | transactions = get_doorinfo( db, options.identity, |
|---|
| 74 | options.start, options.end) |
|---|
| 75 | initiators = {} |
|---|
| 76 | pnfsids = [] |
|---|
| 77 | for keys, values in transactions.iteritems(): |
|---|
| 78 | init = get_billinginfo( db, keys, options.start, options.end) |
|---|
| 79 | initiators.update( init) |
|---|
| 80 | pnfsids.append( values[8]) |
|---|
| 81 | |
|---|
| 82 | display_details( transactions, initiators) |
|---|
| 83 | |
|---|
| 84 | if options.log: |
|---|
| 85 | if len( pnfsids) != 0: |
|---|
| 86 | for id in uniq(pnfsids): |
|---|
| 87 | check_billing_logs( id, options.start, options.end) |
|---|
| 88 | |
|---|
| 89 | db.close() |
|---|
| 90 | |
|---|
| 91 | def display_details( transactions, initiators): |
|---|
| 92 | '''Display the information from the billinginfo and doorinfo tables.''' |
|---|
| 93 | |
|---|
| 94 | initiatorStrings = ['\t isnew: \t', '\t datestamp: \t',\ |
|---|
| 95 | '\t pool cell: \t',\ |
|---|
| 96 | '\t pnfsid: \t', '\t fullsize (B): \t', \ |
|---|
| 97 | '\t transfersize: \t',\ |
|---|
| 98 | '\t storageclass: \t', '\t client: \t',\ |
|---|
| 99 | '\t errorcode: \t', '\t errormessage: \t',\ |
|---|
| 100 | '\t protocol: \t'] |
|---|
| 101 | transactionStrings = ['\t datestamp: \t', '\t door cell: \t',\ |
|---|
| 102 | '\t uid: \t\t',\ |
|---|
| 103 | '\t gid: \t\t', '\t client: \t', \ |
|---|
| 104 | '\t errorcode: \t', '\t errormessage: \t',\ |
|---|
| 105 | '\t path: \t'] |
|---|
| 106 | |
|---|
| 107 | for key, value in initiators.iteritems(): |
|---|
| 108 | for i in range( len( initiatorStrings)): |
|---|
| 109 | if i == 0: |
|---|
| 110 | if value[0] == 't': |
|---|
| 111 | print '#######' |
|---|
| 112 | print '# PUT #' |
|---|
| 113 | print '#######' |
|---|
| 114 | elif value[0] == 'f': |
|---|
| 115 | print '#######' |
|---|
| 116 | print '# GET #' |
|---|
| 117 | print '#######' |
|---|
| 118 | print 'Pool information:' |
|---|
| 119 | else: |
|---|
| 120 | print initiatorStrings[ i], value[ i] |
|---|
| 121 | print 'Door information:' |
|---|
| 122 | for i in range( len( transactionStrings)): |
|---|
| 123 | print transactionStrings[ i], transactions[key][i] |
|---|
| 124 | |
|---|
| 125 | |
|---|
| 126 | def get_doorinfo( db, owner, start, end): |
|---|
| 127 | '''Query for the transactions that the doors processed.''' |
|---|
| 128 | |
|---|
| 129 | query = 'select datestamp, cellname, mappeduid, mappedgid, client, \ |
|---|
| 130 | errorcode, errormessage, path, pnfsid, transaction \ |
|---|
| 131 | from doorinfo where owner=\'%s\' \ |
|---|
| 132 | and datestamp >= \'%s\'\ |
|---|
| 133 | and datestamp <= \'%s\';' % (owner, start, end) |
|---|
| 134 | |
|---|
| 135 | out = db.query( query).getresult() |
|---|
| 136 | |
|---|
| 137 | trans = {} |
|---|
| 138 | |
|---|
| 139 | for entry in out: |
|---|
| 140 | trans[ entry[9]] = entry |
|---|
| 141 | |
|---|
| 142 | return trans |
|---|
| 143 | |
|---|
| 144 | def get_billinginfo( db, transaction, start, end): |
|---|
| 145 | '''Get more information about this pnfsid.''' |
|---|
| 146 | |
|---|
| 147 | query = 'select isnew, datestamp, cellname, pnfsid, fullsize,\ |
|---|
| 148 | transfersize, storageclass, client, errorcode, errormessage, \ |
|---|
| 149 | protocol, initiator \ |
|---|
| 150 | from billinginfo where initiator=\'%s\' \ |
|---|
| 151 | and datestamp >= \'%s\'\ |
|---|
| 152 | and datestamp <= \'%s\';' % (transaction, start, end) |
|---|
| 153 | |
|---|
| 154 | out = db.query( query).getresult() |
|---|
| 155 | |
|---|
| 156 | trans = {} |
|---|
| 157 | |
|---|
| 158 | for entry in out: |
|---|
| 159 | trans[ entry[11]] = entry |
|---|
| 160 | |
|---|
| 161 | return trans |
|---|
| 162 | |
|---|
| 163 | |
|---|
| 164 | def fix_time( time): |
|---|
| 165 | '''If the day or month number is < 10, we need to add a leading 0.''' |
|---|
| 166 | if int( time) < 10: |
|---|
| 167 | return "0%s" % time |
|---|
| 168 | else: |
|---|
| 169 | return time |
|---|
| 170 | |
|---|
| 171 | |
|---|
| 172 | def check_billing_logs( pnfsID, start, end): |
|---|
| 173 | '''grep billing logs for information. Need both start and end times |
|---|
| 174 | in case the billing logs are split over two days.''' |
|---|
| 175 | |
|---|
| 176 | # need to deal with this |
|---|
| 177 | start_list = strptime(start, "%Y-%m-%d %H:%M:%S") |
|---|
| 178 | end_list = strptime(end, "%Y-%m-%d %H:%M:%S") |
|---|
| 179 | |
|---|
| 180 | s_year = start_list[0] |
|---|
| 181 | s_month = start_list[1] |
|---|
| 182 | s_day = start_list[2] |
|---|
| 183 | |
|---|
| 184 | e_year = end_list[0] |
|---|
| 185 | e_month = end_list[1] |
|---|
| 186 | e_day = end_list[2] |
|---|
| 187 | |
|---|
| 188 | grep = "grep %s /opt/d-cache/billing/%s/%s/billing-%s.%s.%s" \ |
|---|
| 189 | % ( pnfsID, s_year, fix_time( s_month), s_year, \ |
|---|
| 190 | fix_time( s_month), fix_time( s_day)) |
|---|
| 191 | |
|---|
| 192 | command = [] |
|---|
| 193 | command.append( grep) |
|---|
| 194 | |
|---|
| 195 | if s_day != e_day: |
|---|
| 196 | # Check if the start and end times are on the same day |
|---|
| 197 | grep2 = "grep %s /opt/d-cache/billing/%s/%s/billing-%s.%s.%s" \ |
|---|
| 198 | % ( pnfsID, e_year, fix_time( e_month), \ |
|---|
| 199 | e_year, fix_time( e_month), fix_time( e_day)) |
|---|
| 200 | command.append( grep2) |
|---|
| 201 | |
|---|
| 202 | reg = re.compile( "RemoveFiles") |
|---|
| 203 | print "\n################\n# BILLING LOGS #\n################" |
|---|
| 204 | for c in command: |
|---|
| 205 | ( stat, out) = commands.getstatusoutput( c) |
|---|
| 206 | print out |
|---|
| 207 | for line in out.split(' '): |
|---|
| 208 | if reg.search( line): |
|---|
| 209 | print "\n######################" |
|---|
| 210 | print "# SUMMARY OF BILLING #" |
|---|
| 211 | print "######################" |
|---|
| 212 | print "%s was deleted\n" % pnfsID |
|---|
| 213 | print "NOTE: dCache does not record an attempt to delete a file that is already deleted" |
|---|
| 214 | |
|---|
| 215 | def uniq( seq): |
|---|
| 216 | # Not order preserving |
|---|
| 217 | keys = {} |
|---|
| 218 | for e in seq: |
|---|
| 219 | keys[e] = 1 |
|---|
| 220 | return keys.keys() |
|---|
| 221 | |
|---|
| 222 | |
|---|
| 223 | def getCredentialID( db, identity): |
|---|
| 224 | '''Map DN to credential ID.''' |
|---|
| 225 | |
|---|
| 226 | query = "select id from srmrequestcredentials where \ |
|---|
| 227 | credentialname like \'%s\'" % identity |
|---|
| 228 | |
|---|
| 229 | id = db.query( query).getresult()[0][0] |
|---|
| 230 | |
|---|
| 231 | return id |
|---|
| 232 | |
|---|
| 233 | |
|---|
| 234 | if __name__ == "__main__": |
|---|
| 235 | main() |
|---|
| 236 | |
|---|