git: 9front

ref: b6211d31d8c064a9c61bf81e3ab7c5d2dc3aa67d
dir: /sys/src/cmd/ip/httpd/classify.c/

View raw version
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ndb.h>
#include "whois.h"

typedef struct Country Country;

struct Country
{
	char *code;
	char *name;
};

Country badc[] =
{
	{"af", "afghanistan"},
	{"cu", "cuba"},
	{"ir", "iran"},
	{"iq", "iraq"},
	{"ly", "libya"},
	{"kp", "north korea"},
	{"sd", "sudan"},
	{"sy", "syria"},
	{ 0, 0 }
};

Country goodc[] =
{
	// the original, us and canada
	{"us", "united states of america"},
	{"ca", "canada"},
	{"gov", "gov"},
	{"mil", "mil"},

	// the european union
	{ "eu",	"european union" },
	{ "be",	"belgium" },
	{ "de",	"germany" },
	{ "fr",	"france" },
	{ "it",	"italy" },
	{ "lu",	"luxembourg" },
	{ "nl",	"netherlands" },
	{ "dk",	"denmark" },
	{ "ie",	"ireland" },
	{ "gb",	"great britain" },
	{ "uk",	"united kingdom" },
	{ "gr",	"greece" },
	{ "es",	"spain" },
	{ "pt",	"portugal" },
	{ "au",	"australia" },
	{ "fi",	"finland" },
	{ "se",	"sweden" },

	// the rest
	{"au", "australia"},
	{"no", "norway"},
	{"cz", "czech republic"},
	{"hu", "hungary"},
	{"pl", "poland"},
	{"jp", "japan"},
	{"ch", "switzerland"},
	{"nz", "new zealand"},
	{ 0, 0 }
};

char *gov[] =
{
	"gov",
	"gouv",
	"mil",
	"government",
	0,
};

Country allc[] =
{
	{ "ad",	"andorra" },
	{ "ae",	"united arab emirates" },
	{ "af",	"afghanistan" },
	{ "ag",	"antigua and barbuda" },
	{ "ai",	"anguilla" },
	{ "al",	"albania" },
	{ "am",	"armenia" },
	{ "an",	"netherlands antilles" },
	{ "ao",	"angola" },
	{ "aq",	"antarctica" },
	{ "ar",	"argentina" },
	{ "as",	"american samoa" },
	{ "at",	"austria" },
	{ "au",	"australia" },
	{ "aw",	"aruba" },
	{ "az",	"azerbaijan" },
	{ "ba",	"bosnia and herzegovina" },
	{ "bb",	"barbados" },
	{ "bd",	"bangladesh" },
	{ "be",	"belgium" },
	{ "bf",	"burkina faso" },
	{ "bg",	"bulgaria" },
	{ "bh",	"bahrain" },
	{ "bi",	"burundi" },
	{ "bj",	"benin" },
	{ "bm",	"bermuda" },
	{ "bn",	"brunei darussalam" },
	{ "bo",	"bolivia" },
	{ "br",	"brazil" },
	{ "bs",	"bahamas" },
	{ "bt",	"bhutan" },
	{ "bu",	"burma" },
	{ "bv",	"bouvet island" },
	{ "bw",	"botswana" },
	{ "by",	"belarus" },
	{ "bz",	"belize" },
	{ "ca",	"canada" },
	{ "cc",	"cocos (keeling) islands" },
	{ "cf",	"central african republic" },
	{ "cg",	"congo" },
	{ "ch",	"switzerland" },
	{ "ci",	"cote d'ivoire (ivory coast)" },
	{ "ck",	"cook islands" },
	{ "cl",	"chile" },
	{ "cm",	"cameroon" },
	{ "cn",	"china" },
	{ "co",	"colombia" },
	{ "cr",	"costa rica" },
	{ "cs",	"czechoslovakia (former)" },
	{ "ct",	"canton and enderbury island" },
	{ "cu",	"cuba" },
	{ "cv",	"cape verde" },
	{ "cx",	"christmas island" },
	{ "cy",	"cyprus" },
	{ "cz",	"czech republic" },
	{ "dd",	"german democratic republic" },
	{ "de",	"germany" },
	{ "dj",	"djibouti" },
	{ "dk",	"denmark" },
	{ "dm",	"dominica" },
	{ "do",	"dominican republic" },
	{ "dz",	"algeria" },
	{ "ec",	"ecuador" },
	{ "ee",	"estonia" },
	{ "eg",	"egypt" },
	{ "eh",	"western sahara" },
	{ "er",	"eritrea" },
	{ "es",	"spain" },
	{ "et",	"ethiopia" },
	{ "eu",	"european union" },
	{ "fi",	"finland" },
	{ "fj",	"fiji" },
	{ "fk",	"falkland islands (malvinas)" },
	{ "fm",	"micronesia" },
	{ "fo",	"faroe islands" },
	{ "fr",	"france" },
	{ "fx",	"france, metropolitan" },
	{ "ga",	"gabon" },
	{ "gb",	"great britain (uk)" },
	{ "gd",	"grenada" },
	{ "ge",	"georgia" },
	{ "gf",	"french guiana" },
	{ "gh",	"ghana" },
	{ "gi",	"gibraltar" },
	{ "gl",	"greenland" },
	{ "gm",	"gambia" },
	{ "gn",	"guinea" },
	{ "gp",	"guadeloupe" },
	{ "gq",	"equatorial guinea" },
	{ "gr",	"greece" },
	{ "gs",	"s. georgia and s. sandwich isls." },
	{ "gt",	"guatemala" },
	{ "gu",	"guam" },
	{ "gw",	"guinea-bissau" },
	{ "gy",	"guyana" },
	{ "hk",	"hong kong" },
	{ "hm",	"heard and mcdonald islands" },
	{ "hn",	"honduras" },
	{ "hr",	"croatia (hrvatska)" },
	{ "ht",	"haiti" },
	{ "hu",	"hungary" },
	{ "id",	"indonesia" },
	{ "ie",	"ireland" },
	{ "il",	"israel" },
	{ "in",	"india" },
	{ "io",	"british indian ocean territory" },
	{ "iq",	"iraq" },
	{ "ir",	"iran" },
	{ "is",	"iceland" },
	{ "it",	"italy" },
	{ "jm",	"jamaica" },
	{ "jo",	"jordan" },
	{ "jp",	"japan" },
	{ "jt",	"johnston island" },
	{ "ke",	"kenya" },
	{ "kg",	"kyrgyzstan" },
	{ "kh",	"cambodia (democratic kampuchea)" },
	{ "ki",	"kiribati" },
	{ "km",	"comoros" },
	{ "kn",	"saint kitts and nevis" },
	{ "kp",	"korea (north)" },
	{ "kr",	"korea (south)" },
	{ "kw",	"kuwait" },
	{ "ky",	"cayman islands" },
	{ "kz",	"kazakhstan" },
	{ "la",	"laos" },
	{ "lb",	"lebanon" },
	{ "lc",	"saint lucia" },
	{ "li",	"liechtenstein" },
	{ "lk",	"sri lanka" },
	{ "lr",	"liberia" },
	{ "ls",	"lesotho" },
	{ "lt",	"lithuania" },
	{ "lu",	"luxembourg" },
	{ "lv",	"latvia" },
	{ "ly",	"libya" },
	{ "ma",	"morocco" },
	{ "mc",	"monaco" },
	{ "md",	"moldova" },
	{ "mg",	"madagascar" },
	{ "mh",	"marshall islands" },
	{ "mi",	"midway islands" },
	{ "mk",	"macedonia" },
	{ "ml",	"mali" },
	{ "mm",	"myanmar" },
	{ "mn",	"mongolia" },
	{ "mo",	"macau" },
	{ "mp",	"northern mariana islands" },
	{ "mq",	"martinique" },
	{ "mr",	"mauritania" },
	{ "ms",	"montserrat" },
	{ "mt",	"malta" },
	{ "mu",	"mauritius" },
	{ "mv",	"maldives" },
	{ "mw",	"malawi" },
	{ "mx",	"mexico" },
	{ "my",	"malaysia" },
	{ "mz",	"mozambique" },
	{ "na",	"namibia" },
	{ "nc",	"new caledonia" },
	{ "ne",	"niger" },
	{ "nf",	"norfolk island" },
	{ "ng",	"nigeria" },
	{ "ni",	"nicaragua" },
	{ "nl",	"netherlands" },
	{ "no",	"norway" },
	{ "np",	"nepal" },
	{ "nq",	"dronning maud land" },
	{ "nr",	"nauru" },
	{ "nt",	"neutral zone" },
	{ "nu",	"niue" },
	{ "nz",	"new zealand (aotearoa)" },
	{ "om",	"oman" },
	{ "pa",	"panama" },
	{ "pc",	"pacific islands" },
	{ "pe",	"peru" },
	{ "pf",	"french polynesia" },
	{ "pg",	"papua new guinea" },
	{ "ph",	"philippines" },
	{ "pk",	"pakistan" },
	{ "pl",	"poland" },
	{ "pm",	"st. pierre and miquelon" },
	{ "pn",	"pitcairn" },
	{ "pr",	"puerto rico" },
	{ "pu",	"united states misc. pacific islands" },
	{ "pt",	"portugal" },
	{ "pw",	"palau" },
	{ "py",	"paraguay" },
	{ "qa",	"qatar" },
	{ "re",	"reunion" },
	{ "ro",	"romania" },
	{ "ru",	"russian federation" },
	{ "rw",	"rwanda" },
	{ "sa",	"saudi arabia" },
	{ "sb",	"solomon islands" },
	{ "sc",	"seychelles" },
	{ "sd",	"sudan" },
	{ "se",	"sweden" },
	{ "sg",	"singapore" },
	{ "sh",	"st. helena" },
	{ "si",	"slovenia" },
	{ "sj",	"svalbard and jan mayen islands" },
	{ "sk",	"slovak republic" },
	{ "sl",	"sierra leone" },
	{ "sm",	"san marino" },
	{ "sn",	"senegal" },
	{ "so",	"somalia" },
	{ "sr",	"suriname" },
	{ "st",	"sao tome and principe" },
	{ "su",	"ussr (former)" },
	{ "sv",	"el salvador" },
	{ "sy",	"syria" },
	{ "sz",	"swaziland" },
	{ "tc",	"turks and caicos islands" },
	{ "td",	"chad" },
	{ "tf",	"french southern territories" },
	{ "tg",	"togo" },
	{ "th",	"thailand" },
	{ "tj",	"tajikistan" },
	{ "tk",	"tokelau" },
	{ "tm",	"turkmenistan" },
	{ "tn",	"tunisia" },
	{ "to",	"tonga" },
	{ "tp",	"east timor" },
	{ "tr",	"turkey" },
	{ "tt",	"trinidad and tobago" },
	{ "tv",	"tuvalu" },
	{ "tw",	"taiwan" },
	{ "tz",	"tanzania" },
	{ "ua",	"ukraine" },
	{ "ug",	"uganda" },
	{ "uk",	"united kingdom" },
	{ "um",	"us minor outlying islands" },
	{ "us",	"united states" },
	{ "uy",	"uruguay" },
	{ "uz",	"uzbekistan" },
	{ "va",	"vatican city state (holy see)" },
	{ "vc",	"saint vincent and the grenadines" },
	{ "ve",	"venezuela" },
	{ "vg",	"virgin islands (british)" },
	{ "vi",	"virgin islands (u.s.)" },
	{ "vn",	"viet nam" },
	{ "vu",	"vanuatu" },
	{ "wf",	"wallis and futuna islands" },
	{ "wk",	"wake island" },
	{ "ws",	"samoa" },
	{ "yd",	"democratic yemen" },
	{ "ye",	"yemen" },
	{ "yt",	"mayotte" },
	{ "yu",	"yugoslavia" },
	{ "za",	"south africa" },
	{ "zm",	"zambia" },
	{ "zr",	"zaire" },
	{ "zw",	"zimbabwe" },

	{"gov", "gov"},
	{"mil", "mil"},

	{ 0, 0 }
};

int classdebug;

static int
incountries(char *s, Country *cp)
{
	for(; cp->code != 0; cp++)
		if(cistrcmp(s, cp->code) == 0
		|| cistrcmp(s, cp->name) == 0)
			return 1;
	return 0;
}

static int
indomains(char *s, char **dp)
{
	for(; *dp != nil; dp++)
		if(cistrcmp(s, *dp) == 0)
			return 1;

	return 0;
}

int
classify(char *ip, Ndbtuple *t)
{
	int isgov, iscountry, isbadc, isgoodc;
	char dom[256];
	char *df[128];
	Ndbtuple *nt, *x;
	int n;

	isgov = iscountry = isbadc = 0;
	isgoodc = 1;
	
	for(nt = t; nt != nil; nt = nt->entry){
		if(strcmp(nt->attr, "country") == 0){
			iscountry = 1;
			if(incountries(nt->val, badc)){
				if(classdebug)fprint(2, "isbadc\n");
				isbadc = 1;
				isgoodc = 0;
			} else if(!incountries(nt->val, goodc)){
				if(classdebug)fprint(2, "!isgoodc\n");
				isgoodc = 0;
			}
		}

		/* domain names can always hurt, even without forward verification */
		if(strcmp(nt->attr, "dom") == 0){
			strncpy(dom, nt->val, sizeof dom);
			dom[sizeof(dom)-1] = 0;
			n = getfields(dom, df, nelem(df), 0, ".");

			/* a bad country in a domain name is always believed */
			if(incountries(df[n-1], badc)){
				if(classdebug)fprint(2, "isbadc dom\n");
				isbadc = 1;
				isgoodc = 0;
			}

			/* a goverment in a domain name is always believed */
			if(n > 1 && indomains(df[n-2], gov))
				isgov = 1;
		}
	}
	if(iscountry == 0){
		/* did the forward lookup work? */
		for(nt = t; nt != nil; nt = nt->entry){
			if(strcmp(nt->attr, "ip") == 0 && strcmp(nt->val, ip) == 0)
				break;
		}

		/* see if the domain name ends in a country code */
		if(nt != nil && (x = ndbfindattr(t, nt, "dom")) != nil){
			strncpy(dom, x->val, sizeof dom);
			dom[sizeof(dom)-1] = 0;
			n = getfields(dom, df, nelem(df), 0, ".");
			if(incountries(df[n-1], allc))
				iscountry = 1;
		}
	}
	if(iscountry == 0)
		return Cunknown;
	if(isbadc)
		return Cbadc;
	if(!isgoodc && isgov)
		return Cbadgov;
	return Cok;
}