#!/bin/sh # author: alex trull # background: # this a quickly-written script to turn the bigblacklist from urlblacklist.com into a poor man's bcwf (bluecoat web filter) # for use as the 'local' filter by a bluecoat and for use in policies # instructions: # get the bigblacklist from urlblacklist.com , decompress it and move/copy this script into the directory which contains the category directories # execute the script # the bluecoat.db needs to be available on a webserver near the bluecoat - it takes a few minutes to load on an sg200b or similar. # once loaded, the categories are available through the policy manager # make a list of categories based on directory, strip out the super-pointless/super-lame/super-duplicate directories (bluecoat only wants a url to be in a maximum of 4 categories.. weirdos) dirlist=`find . -type d | cut -c 3-| egrep -v "(chat|astrology|clothing|culinary|filehosting|financial|marketingware|mixed|online|searchblog|aggressive|news|french|ringtones|beer|mobile|whitelist|ecommerce|kids|jewelry|update|antispyware|childcare|cellphones|liquor|home|gardening|government|sect|religion)" ` # remove bluecoat.db if it exists.. we want a clean slate each time rm bluecoat.db for category in $dirlist; do if [ -d $category ]; then echo "define category $category" if [ -f $category/domains ]; then awk '{print " "$0}' $category/domains | sed -e "s/\.\./\./g" | grep -v \.321\. | egrep -v "(192.67.198.49|204.228.229.168|204.228.229.181|208.185.127.162|208.185.127.169|208.254.3.130|209.67.50.203|212.100.230.160|213.130.63.234|216.15.191.44|216.194.70.16|216.194.70.17|216.194.70.18|216.194.70.19|63.251.163.112|64.15.205.155|64.15.205.180|64.15.205.182|64.15.205.183|64.15.205.202|64.40.102.41|66.116.109.35|199.72.44.116|208.38.59.181|208.38.59.182|208.38.59.183|208.38.61.23|208.38.61.25|213.130.63.232|216.194.70.4|64.235.246.143|66.216.74.57|thisisarandomentrythatdoesnotexist.com|indiatimes.com|go.icq.com)" >> bluecoat.db fi # uncomment the three lines below if you want to add urls to the category - untested! # if [ -f $category/urls ]; then # awk '{print " "$0}' $category/urls >> bluecoat.db # fi fi echo end done