====== A quick log analyzer ======
Here comes a quick log analyzer for web logs. It is an [[http://en.wikipedia.org/wiki/Awk|AWK]] script that parses a log file and generates insert commands. You can pipe this insert commands into a mysql database (the table is provided in the comments) and, using the power of SQL, generate queries about queries per day, queries per month, single visits, etc.
A little of AWK is required, and knowledge of regular expressions.
10.255.255.255 - - [17/Apr/2012:18:49:44 +0200] GET /this/is/the/web/page.html HTTP/1.1 200 1767 Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.162 Safari/535.19 -
10.256.256.256 - - [17/Apr/2012:18:49:44 +0200] GET /this/is/another/web/page/default.css HTTP/1.1 304 - Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.162 Safari/535.19 -
#
# logs2mysql.awk - inserta los logs en una base de datos para hacer anĂ¡lisis
#
# To execute:
# awk -f logs2mysql.awk logfile.log| mysql -h hostname -u username -p database
#
#
# create table analisis
# ( ip varchar( 1000 ),
# value1 varchar( 1000 ),
# value2 varchar( 1000 ),
# time_stamp date,
# command varchar( 1000 ),
# url varchar( 1000 ),
# protocol varchar( 1000 ),
# response varchar( 1000 ),
# bytes varchar( 1000 ),
# user_agent varchar( 2000)
# );
#
BEGIN {
print "-- awk -f logs2mysql.sql logfile.log| mysql -h hostname -u username -p database ";
print "-- awk -f logs2mysql.sql logfile.log| mysql -h hostname -u username -p database ";
print "-- awk -f logs2mysql.sql logfile.log| mysql -h hostname -u username -p database ";
command = "insert into analisis " \
"(ip, " \
"value1, " \
"value2, " \
"time_stamp, " \
"command, " \
"url, " \
"protocol, " \
"response, " \
"bytes, " \
"user_agent)" \
"values " \
"('##ip', " \
"'##value1', " \
"'##value2', " \
"str_to_date( '##time_stamp', '%d/%b/%Y:%H:%i:%s' ), " \
"'##command', " \
"'##url', " \
"'##protocol', " \
"'##response', " \
"'##value3', " \
"'##user_agent'); "
}
/.*/ {
#10.256.256.256 - - [26/Apr/2012:08:16:48 +0200] GET /example/web/page.html HTTP/1.1 200 4149 Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; PRM_IE; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022) -
match( $0, "^([^ ]*) ([^ ]*) ([^ ]*) \\[(.*)\\] ([[:alpha:]]*) ([^ ]*) ([^ ]*) ([[:digit:]]*) ([[:digit:]-]*) (.*)", values )
# print "ip: " values[1];
# print "value1: " values[2];
# print "value2: " values[3];
# print "time_stamp: " values[4];
# print "command: " values[5];
# print "url: " values[6];
# print "protocol: " values[7];
# print "response: " values[8];
# print "bytes: " values[9];
# print "user agent: " values[10];
# print "-----------------------------";
url = values[6];
ignore = 0;
ignore = ignore || match( url, "\.png$" );
ignore = ignore || match( url, "\.css$" );
ignore = ignore || match( url, "\.js$" );
if( !ignore )
{
out = command;
out = gensub( "##ip", escape( values[1] ), "g", out );
out = gensub( "##value1", escape( values[2] ), "g", out );
out = gensub( "##value2", escape( values[3] ), "g", out );
the_date = gensub( "\+0200", "", "g", values[4] );
out = gensub( "##time_stamp", escape( the_date ), "g", out );
out = gensub( "##command", escape( values[5] ), "g", out );
out = gensub( "##url", escape( values[6] ), "g", out );
out = gensub( "##protocol", escape( values[7] ), "g", out );
out = gensub( "##response", escape( values[8] ), "g", out );
out = gensub( "##value3", escape( values[9] ), "g", out );
out = gensub( "##user_agent", escape( values[10] ), "g", out );
print out;
} # if ignore
}
END {
}
function escape( input )
{
out = input;
out = gensub( ";", "\;", "g", out );
out = gensub( "\\*", "\\\*", "g", out );
out = gensub( "%", "\%", "g", out );
out = gensub( "'", "\'", "g", out );
out = gensub( "\"", "\\\"", "g", out );
return out;
} # escape