====== A quick log analyzer ====== Here comes a quick log analyzer for web logs. It is an [[http://en.wikipedia.org/wiki/Awk|AWK]] script that parses a log file and generates insert commands. You can pipe this insert commands into a mysql database (the table is provided in the comments) and, using the power of SQL, generate queries about queries per day, queries per month, single visits, etc. A little of AWK is required, and knowledge of regular expressions. 10.255.255.255 - - [17/Apr/2012:18:49:44 +0200] GET /this/is/the/web/page.html HTTP/1.1 200 1767 Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.162 Safari/535.19 - 10.256.256.256 - - [17/Apr/2012:18:49:44 +0200] GET /this/is/another/web/page/default.css HTTP/1.1 304 - Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.162 Safari/535.19 - # # logs2mysql.awk - inserta los logs en una base de datos para hacer anĂ¡lisis # # To execute: # awk -f logs2mysql.awk logfile.log| mysql -h hostname -u username -p database # # # create table analisis # ( ip varchar( 1000 ), # value1 varchar( 1000 ), # value2 varchar( 1000 ), # time_stamp date, # command varchar( 1000 ), # url varchar( 1000 ), # protocol varchar( 1000 ), # response varchar( 1000 ), # bytes varchar( 1000 ), # user_agent varchar( 2000) # ); # BEGIN { print "-- awk -f logs2mysql.sql logfile.log| mysql -h hostname -u username -p database "; print "-- awk -f logs2mysql.sql logfile.log| mysql -h hostname -u username -p database "; print "-- awk -f logs2mysql.sql logfile.log| mysql -h hostname -u username -p database "; command = "insert into analisis " \ "(ip, " \ "value1, " \ "value2, " \ "time_stamp, " \ "command, " \ "url, " \ "protocol, " \ "response, " \ "bytes, " \ "user_agent)" \ "values " \ "('##ip', " \ "'##value1', " \ "'##value2', " \ "str_to_date( '##time_stamp', '%d/%b/%Y:%H:%i:%s' ), " \ "'##command', " \ "'##url', " \ "'##protocol', " \ "'##response', " \ "'##value3', " \ "'##user_agent'); " } /.*/ { #10.256.256.256 - - [26/Apr/2012:08:16:48 +0200] GET /example/web/page.html HTTP/1.1 200 4149 Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; PRM_IE; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022) - match( $0, "^([^ ]*) ([^ ]*) ([^ ]*) \\[(.*)\\] ([[:alpha:]]*) ([^ ]*) ([^ ]*) ([[:digit:]]*) ([[:digit:]-]*) (.*)", values ) # print "ip: " values[1]; # print "value1: " values[2]; # print "value2: " values[3]; # print "time_stamp: " values[4]; # print "command: " values[5]; # print "url: " values[6]; # print "protocol: " values[7]; # print "response: " values[8]; # print "bytes: " values[9]; # print "user agent: " values[10]; # print "-----------------------------"; url = values[6]; ignore = 0; ignore = ignore || match( url, "\.png$" ); ignore = ignore || match( url, "\.css$" ); ignore = ignore || match( url, "\.js$" ); if( !ignore ) { out = command; out = gensub( "##ip", escape( values[1] ), "g", out ); out = gensub( "##value1", escape( values[2] ), "g", out ); out = gensub( "##value2", escape( values[3] ), "g", out ); the_date = gensub( "\+0200", "", "g", values[4] ); out = gensub( "##time_stamp", escape( the_date ), "g", out ); out = gensub( "##command", escape( values[5] ), "g", out ); out = gensub( "##url", escape( values[6] ), "g", out ); out = gensub( "##protocol", escape( values[7] ), "g", out ); out = gensub( "##response", escape( values[8] ), "g", out ); out = gensub( "##value3", escape( values[9] ), "g", out ); out = gensub( "##user_agent", escape( values[10] ), "g", out ); print out; } # if ignore } END { } function escape( input ) { out = input; out = gensub( ";", "\;", "g", out ); out = gensub( "\\*", "\\\*", "g", out ); out = gensub( "%", "\%", "g", out ); out = gensub( "'", "\'", "g", out ); out = gensub( "\"", "\\\"", "g", out ); return out; } # escape