User Tools

Site Tools


mysql:quickloganalyzer

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
mysql:quickloganalyzer [2012/05/02 19:14] – creado rlunaromysql:quickloganalyzer [2022/12/02 22:02] (current) – external edit 127.0.0.1
Line 1: Line 1:
 +====== A quick log analyzer ======
 +
 +
 +Here comes a quick log analyzer for web logs. It is an [[http://en.wikipedia.org/wiki/Awk|AWK]] script that parses a log file and generates insert commands. You can pipe this insert commands into a mysql database (the table is provided in the comments) and, using the power of SQL, generate queries about queries per day, queries per month, single visits, etc. 
 +
 +A little of AWK is required, and knowledge of regular expressions. 
 +
 +<code>
 +10.255.255.255 - - [17/Apr/2012:18:49:44 +0200] GET /this/is/the/web/page.html HTTP/1.1 200 1767 Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.162 Safari/535.19 -
 +10.256.256.256 - - [17/Apr/2012:18:49:44 +0200] GET /this/is/another/web/page/default.css HTTP/1.1 304 - Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.162 Safari/535.19 -
 +</code>
 +
 +<code awk>
 +#
 +# logs2mysql.awk - inserta los logs en una base de datos para hacer análisis
 +#
 +# To execute: 
 +# awk -f logs2mysql.awk logfile.log| mysql -h hostname -u username -p database 
 +#
 +
 +# create table analisis 
 +# ( ip  varchar( 1000 ),
 +#   value1 varchar( 1000 ), 
 +#   value2 varchar( 1000 ), 
 +#   time_stamp date,
 +#   command varchar( 1000 ), 
 +#   url varchar( 1000 ), 
 +#   protocol varchar( 1000 ), 
 +#   response varchar( 1000 ), 
 +#   bytes varchar( 1000 ), 
 +#   user_agent varchar( 2000) 
 +# );
 +
 +
 +BEGIN {
 + print "-- awk -f logs2mysql.sql logfile.log| mysql -h hostname -u username -p database ";
 + print "-- awk -f logs2mysql.sql logfile.log| mysql -h hostname -u username -p database ";
 + print "-- awk -f logs2mysql.sql logfile.log| mysql -h hostname -u username -p database "; 
 +
 +
 +
 + command = "insert into analisis " \
 + "(ip, " \
 + "value1, " \
 + "value2, " \
 + "time_stamp, " \
 + "command, " \
 + "url, " \
 + "protocol, " \
 + "response, " \
 + "bytes, " \
 + "user_agent)" \
 + "values " \
 + "('##ip', " \
 + "'##value1', " \
 + "'##value2', " \
 + "str_to_date( '##time_stamp', '%d/%b/%Y:%H:%i:%s' ), " \
 + "'##command', " \
 + "'##url', " \
 + "'##protocol', " \
 + "'##response', " \
 + "'##value3', " \
 + "'##user_agent');
 + }
 +
 +/.*/ {
 + #10.256.256.256 - - [26/Apr/2012:08:16:48 +0200] GET /example/web/page.html HTTP/1.1 200 4149 Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; PRM_IE; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022) -
 + match( $0, "^([^ ]*) ([^ ]*) ([^ ]*) \\[(.*)\\] ([[:alpha:]]*) ([^ ]*) ([^ ]*) ([[:digit:]]*) ([[:digit:]-]*) (.*)", values )
 +# print "ip: " values[1];
 +# print "value1: " values[2];
 +# print "value2: " values[3];
 +# print "time_stamp: " values[4];
 +# print "command: " values[5];
 +# print "url: " values[6];
 +# print "protocol: " values[7];
 +# print "response: " values[8];
 +# print "bytes: " values[9];
 +# print "user agent: " values[10];
 +# print "-----------------------------"; 
 +
 + url = values[6];
 + ignore = 0;
 + ignore = ignore || match( url, "\.png$" );
 + ignore = ignore || match( url, "\.css$" );
 + ignore = ignore || match( url, "\.js$" );
 +
 + if( !ignore ) 
 + {
 + out = command;
 + out = gensub( "##ip", escape( values[1] ), "g", out ); 
 + out = gensub( "##value1", escape( values[2] ), "g", out ); 
 + out = gensub( "##value2", escape( values[3] ), "g", out ); 
 + the_date = gensub( "\+0200", "", "g", values[4] );
 + out = gensub( "##time_stamp", escape( the_date ), "g", out ); 
 + out = gensub( "##command", escape( values[5] ), "g", out ); 
 + out = gensub( "##url", escape( values[6] ), "g", out ); 
 + out = gensub( "##protocol", escape( values[7] ), "g", out ); 
 + out = gensub( "##response", escape( values[8] ), "g", out ); 
 + out = gensub( "##value3", escape( values[9] ), "g", out ); 
 + out = gensub( "##user_agent", escape( values[10] ), "g", out ); 
 + print out;
 + } # if ignore
 +
 + }
 +
 +END {
 + }
 +
 +function escape( input ) 
 +{
 + out = input;
 + out = gensub( ";", "\;", "g", out );
 + out = gensub( "\\*", "\\\*", "g", out );
 + out = gensub( "%", "\%", "g", out );
 + out = gensub( "'", "\'", "g", out );
 + out = gensub( "\"", "\\\"", "g", out );
 +
 + return out; 
 +
 +} # escape 
 +
 +
 +
 +</code>
 +