source: contributed/trunk/billing-query/dcache-billing-query.py @ 7307

Revision 27, 7.5 KB checked in by gcowan, 13 years ago (diff)

Added some documentation.

  • Property svn:executable set to *
Line 
1#! /usr/bin/python
2#
3# $Id: $
4
5''' This script pulls information about user activities from the
6dCache billing database and logs. This will be useful if there is a
7security incident at your site and you want to pick out all relevant
8information about the activity of a particular user DN.
9
10Usage
11-----
12
13$ python dcache-billing-query.py -s "2008-01-23 12:00:00" -e "2008-01-24 23:00:00" -i "User DN"
14
15Once you have pinned down the start and end times to within a day of
16each other, run the script again with the -l option to query the
17billing log files (with out -l it just looks at the billing
18database).
19
20References
21----------
22
23Script: http://trac.dcache.org/trac.cgi/browser/contributed/trunk/billing-query
24PyGreSQL: http://www.pygresql.org/
25'''
26
27__author__ = 'Greig A Cowan'
28__date__ = 'March 2008'
29__version__ = 0.1
30
31from time import strptime
32import sys
33import commands
34import re
35import pg
36from optparse import OptionParser
37
38def main():
39    '''Parse command line options, call methods'''
40    parser = OptionParser(
41        usage = 'usage: %prog [options]')
42    parser.add_option('-d', '--debug', dest='debug', action='store_true',
43                      help='use debug flag only for testing.')
44    parser.add_option('-s', '--start', dest='start',
45                      default='2008-03-03 00:00:00',
46                      help='start timestamp')
47    parser.add_option('-e', '--end', dest='end',
48                      default='2008-03-05 00:00:00',
49                      help='end timestamp')
50    parser.add_option('-b', '--billinghost', dest='host',
51                      default='localhost',
52                      help='host where the database is running')
53    parser.add_option('-l', '--logs', dest='log',
54                      action='store_true',
55                      help='set this option if you want to parse the \
56                      billing logs on the day of the START and END times')
57    parser.add_option('-i', '--id', dest='identity',
58                      default='/C=UK/O=eScience/OU=Edinburgh/L=NeSC/CN=greig cowan',
59                      help='user DN')
60   
61    # Parse commane-line input
62    (options, args) = parser.parse_args()
63    try:
64        input_filename = args
65    except ValueError:
66        parser.print_help()
67       
68    if len(args) > 1:
69        parser.error('incorrect number of arguments')
70                   
71    db = pg.connect(dbname='billing', host=options.host, user='srmdcache')
72   
73    transactions = get_doorinfo( db, options.identity,
74                                 options.start, options.end)
75    initiators = {}
76    pnfsids = []
77    for keys, values in transactions.iteritems():
78        init = get_billinginfo( db, keys, options.start, options.end)
79        initiators.update( init)
80        pnfsids.append( values[8])
81       
82    display_details( transactions, initiators)
83
84    if options.log:
85        if len( pnfsids) != 0:
86            for id in uniq(pnfsids):
87                check_billing_logs( id, options.start, options.end)
88
89    db.close()
90
91def display_details( transactions, initiators):
92    '''Display the information from the billinginfo and doorinfo tables.'''
93
94    initiatorStrings = ['\t isnew: \t', '\t datestamp: \t',\
95                        '\t pool cell: \t',\
96                        '\t pnfsid: \t', '\t fullsize (B): \t', \
97                        '\t transfersize: \t',\
98                        '\t storageclass: \t', '\t client: \t',\
99                        '\t errorcode: \t', '\t errormessage: \t',\
100                        '\t protocol: \t']
101    transactionStrings = ['\t datestamp: \t', '\t door cell: \t',\
102                          '\t uid: \t\t',\
103                          '\t gid: \t\t', '\t client: \t', \
104                          '\t errorcode: \t', '\t errormessage: \t',\
105                          '\t path: \t']
106
107    for key, value in initiators.iteritems():
108        for i in range( len( initiatorStrings)):
109            if i == 0:
110                if value[0] == 't':
111                    print '#######'
112                    print '# PUT #'
113                    print '#######'
114                elif value[0] == 'f':
115                    print '#######'
116                    print '# GET #'
117                    print '#######'
118                print 'Pool information:'
119            else:
120                print initiatorStrings[ i], value[ i]
121        print 'Door information:'
122        for i in range( len( transactionStrings)):
123            print transactionStrings[ i], transactions[key][i]
124
125
126def get_doorinfo( db, owner, start, end):
127    '''Query for the transactions that the doors processed.'''
128
129    query = 'select datestamp, cellname, mappeduid, mappedgid, client, \
130    errorcode, errormessage, path, pnfsid, transaction \
131    from doorinfo where owner=\'%s\' \
132    and datestamp >= \'%s\'\
133    and datestamp <= \'%s\';' % (owner, start, end)
134
135    out = db.query( query).getresult()
136
137    trans = {}
138
139    for entry in out:
140        trans[ entry[9]] = entry
141
142    return trans
143
144def get_billinginfo( db, transaction, start, end):
145    '''Get more information about this pnfsid.'''
146   
147    query = 'select isnew, datestamp, cellname, pnfsid, fullsize,\
148    transfersize, storageclass, client, errorcode, errormessage, \
149    protocol, initiator \
150    from billinginfo where initiator=\'%s\' \
151    and datestamp >= \'%s\'\
152    and datestamp <= \'%s\';' % (transaction, start, end)
153
154    out = db.query( query).getresult()
155
156    trans = {}
157
158    for entry in out:
159        trans[ entry[11]] = entry
160
161    return trans
162
163
164def fix_time( time):
165    '''If the day or month number is < 10, we need to add a leading 0.'''
166    if int( time) < 10:
167        return "0%s" % time
168    else:
169        return time
170
171
172def check_billing_logs( pnfsID, start, end):
173    '''grep billing logs for information. Need both start and end times
174    in case the billing logs are split over two days.'''
175   
176    # need to deal with this
177    start_list = strptime(start, "%Y-%m-%d %H:%M:%S")
178    end_list   = strptime(end,   "%Y-%m-%d %H:%M:%S")
179   
180    s_year = start_list[0]
181    s_month = start_list[1]
182    s_day = start_list[2]
183   
184    e_year = end_list[0]
185    e_month = end_list[1]
186    e_day = end_list[2]
187
188    grep = "grep %s /opt/d-cache/billing/%s/%s/billing-%s.%s.%s" \
189           % ( pnfsID, s_year, fix_time( s_month), s_year, \
190               fix_time( s_month), fix_time( s_day))
191
192    command = []
193    command.append( grep)
194
195    if s_day != e_day:
196        # Check if the start and end times are on the same day
197        grep2 = "grep %s /opt/d-cache/billing/%s/%s/billing-%s.%s.%s" \
198                % ( pnfsID, e_year, fix_time( e_month), \
199                    e_year, fix_time( e_month), fix_time( e_day))
200        command.append( grep2)
201
202    reg = re.compile( "RemoveFiles")
203    print "\n################\n# BILLING LOGS #\n################"
204    for c in command:
205        ( stat, out) = commands.getstatusoutput( c)
206        print out
207        for line in out.split(' '):
208            if reg.search( line):
209                print "\n######################"
210                print "# SUMMARY OF BILLING #"
211                print "######################"
212                print "%s was deleted\n" % pnfsID
213                print "NOTE: dCache does not record an attempt to delete a file that is already deleted"
214       
215def uniq( seq):
216    # Not order preserving
217    keys = {}
218    for e in seq:
219        keys[e] = 1
220        return keys.keys()
221
222
223def getCredentialID( db, identity):
224    '''Map DN to credential ID.'''
225
226    query = "select id from srmrequestcredentials where \
227    credentialname like \'%s\'" % identity
228
229    id = db.query( query).getresult()[0][0]
230
231    return id
232
233
234if __name__ == "__main__":
235    main()
236
Note: See TracBrowser for help on using the repository browser.