基于机器学习的WAF模型探究6
验证猜想:想做一个报文攻击类别分类器是一个比较棘手的问题,今天想用SVM跑一下3分类,分别是正常报文,XSS报文,SQL报文。在尝试使用rbf核函数的时候,发现正常样本被分类到XSS中,换成linear就好了,大雾。由于后面的攻击检测引擎是SVM2分类,所以多分类之后再2分类验证的意义不太大,毕竟都是SVM也没啥好比较的,直接单引擎写一个小小的demo验证一下思路。首先python文件跑通了,然后
·
验证猜想:
想做一个报文攻击类别分类器是一个比较棘手的问题,今天想用SVM跑一下3分类,分别是正常报文,XSS报文,SQL报文。在尝试使用rbf核函数的时候,发现正常样本被分类到XSS中,换成linear就好了,大雾。
由于后面的攻击检测引擎是SVM2分类,所以多分类之后再2分类验证的意义不太大,毕竟都是SVM也没啥好比较的,直接单引擎写一个小小的demo验证一下思路。
首先python文件跑通了,然后ctest跑通了,最后Nginx也跑通了,看上去一个小小的demo就出来了,算是迈出了基于机器学习的WAF模型探究的第一步,后续还需要研究Nginx如何输送报文给我们的引擎。
AB压力测试结果:
nginx worker进程1个的结果如下,如果worker进程多点,数据会好看点。因为这个demo是对每一个HTTP请求都检查,不知道是好还是坏,隐约记得modsecurity只检查部分HTTP请求,好像是用户的请求。
Document Path: /
Document Length: 7093 bytes
Concurrency Level: 100
Time taken for tests: 26.555 seconds
Complete requests: 10000
Failed requests: 0
Total transferred: 72450000 bytes
HTML transferred: 70930000 bytes
Requests per second: 376.57 [#/sec] (mean)
Time per request: 265.554 [ms] (mean)
Time per request: 2.656 [ms] (mean, across all concurrent requests)
Transfer rate: 2664.31 [Kbytes/sec] received
模型结构:
├── a.out
├── config
├── data
│ ├── good-10000.txt
│ ├── good.txt
│ ├── sql-10000.txt
│ └── xss-200000.txt
├── model
│ └── waf.pkl
├── ngx_http_aisecurity_module.c
├── run.sh
├── test.c
├── waf.c
├── waf.h
├── waf.h.gch
├── waf.py
└── waf.pyc
部分代码:
// ngx_http_aisecurity_module.c by ailx10
#include "waf.h"
#include <ngx_config.h>
#include <ngx_core.h>
#include <ngx_http.h>
static ngx_int_t ngx_http_aisecurity_handler(ngx_http_request_t *r);
static ngx_int_t ngx_http_aisecurity_init(ngx_conf_t *cf);
static void *ngx_http_aisecurity_create_main_conf(ngx_conf_t *cf);
char *ngx_str_to_char(ngx_str_t a, ngx_pool_t *p);
const char** get_data(const char* uri,const char* method);
void free_data(const char** pkt_addr);
typedef struct {
PyObject* pEngine;
PyObject* pModule;
ngx_flag_t enable;
void * pool;
} ngx_http_aisecurity_conf_t;
ngx_int_t aisecurity_process_uri(ngx_http_aisecurity_conf_t* cf,const char** pkt_loads);
static ngx_command_t ngx_http_aisecurity_commands[] = {
{
ngx_string("aisecurity"),
NGX_HTTP_MAIN_CONF|NGX_CONF_TAKE1,
ngx_conf_set_flag_slot,
NGX_HTTP_MAIN_CONF_OFFSET,
offsetof(ngx_http_aisecurity_conf_t, enable),
NULL,
},
ngx_null_command
};
static ngx_http_module_t ngx_http_aisecurity_module_ctx = {
NULL,
ngx_http_aisecurity_init,
ngx_http_aisecurity_create_main_conf,
NULL,
NULL,
NULL,
NULL,
NULL
};
ngx_module_t ngx_http_aisecurity_module = {
NGX_MODULE_V1,
&ngx_http_aisecurity_module_ctx,
ngx_http_aisecurity_commands,
NGX_HTTP_MODULE,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NGX_MODULE_V1_PADDING
};
static ngx_int_t ngx_http_aisecurity_init(ngx_conf_t *cf)
{
ngx_http_handler_pt *h_rewrite;
ngx_http_core_main_conf_t *cmcf;
cmcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_core_module);
if (cmcf == NULL)
{
return NGX_ERROR;
}
h_rewrite = ngx_array_push(&cmcf->phases[NGX_HTTP_REWRITE_PHASE].handlers);
if (h_rewrite == NULL)
{
return NGX_ERROR;
}
*h_rewrite = ngx_http_aisecurity_handler;
return NGX_OK;
}
static void *ngx_http_aisecurity_create_main_conf(ngx_conf_t *cf)
{
ngx_http_aisecurity_conf_t *conf = (ngx_http_aisecurity_conf_t *)
ngx_pcalloc(cf->pool, sizeof(ngx_http_aisecurity_conf_t));
if (conf == NULL || conf == NGX_CONF_ERROR) {
return NGX_CONF_ERROR;
}
PyObject* pModule = aisec_waf_init();
PyObject* pEngine = aisec_waf_load(pModule);
conf->pModule = pModule;
conf->pEngine = pEngine;
conf->enable = NGX_CONF_UNSET;
conf->pool = cf->pool;
printf("(nginx)%p\t%p\n",pModule,pEngine);
return conf;
}
static ngx_int_t ngx_http_aisecurity_handler(ngx_http_request_t *r)
{
ngx_int_t atk = 0;
ngx_int_t ret = 0;
ngx_http_aisecurity_conf_t* cf;
cf = ngx_http_get_module_main_conf(r, ngx_http_aisecurity_module);
if(cf->enable != 1) return NGX_DECLINED;
const char* uri = ngx_str_to_char(r->unparsed_uri,r->pool);
const char* method = ngx_str_to_char(r->method_name,r->pool);
const char** pkt_addr = get_data(uri,method);
atk = aisecurity_process_uri(cf,pkt_addr);
free_data(pkt_addr);
if(atk)
ret = NGX_HTTP_FORBIDDEN;
else
ret = NGX_DECLINED;
return ret;
}
const char** get_data(const char* uri,const char* method)
{
char** pkt_addr = (char**)malloc(2*sizeof(char*));
pkt_addr[0] = (char*)uri;
pkt_addr[1] = (char*)method;
printf("pkt:\n%s\n%s\n",pkt_addr[0],pkt_addr[1]);
return (const char**)pkt_addr;
}
void free_data(const char** pkt_addr)
{
free(pkt_addr);
return;
}
ngx_int_t aisecurity_process_uri(ngx_http_aisecurity_conf_t* cf,const char** pkt_loads)
{
ngx_int_t ret = 0;
ret = aisec_waf_predict(cf->pModule,cf->pEngine,pkt_loads);
return ret;
}
ngx_inline char *ngx_str_to_char(ngx_str_t a, ngx_pool_t *p)
{
char *str = NULL;
if (a.len == 0) {
return NULL;
}
str = ngx_pnalloc(p, a.len+1);
if (str == NULL) {
return (char *)-1;
}
ngx_memcpy(str, a.data, a.len);
str[a.len] = '\0';
return str;
}
// waf.c by ailx10
#include "waf.h"
PyObject* aisec_waf_init()
{
Py_Initialize();
PyRun_SimpleString("import sys");
PyRun_SimpleString("sys.path.append('/home/nginx/aisecurity-0.2')");
PyObject* moduleName = PyString_FromString("waf");
PyObject* pModule = PyImport_Import(moduleName);
if (!pModule)
{
printf("[C++] Python get module failed.\n");
exit(0);
}
printf("[C++]waf load pModule ok :%p\n",pModule);
return pModule;
}
PyObject* aisec_waf_load(PyObject* pModule)
{
PyObject* waf_load = PyObject_GetAttrString(pModule,"waf_load");
if (!waf_load || !PyCallable_Check(waf_load))
{
printf("[C++] Can't find funftion (waf_load).\n");
exit(0);
}
PyObject* waf_engine = PyObject_CallObject(waf_load,NULL);
printf("[C++]waf load pEngine ok :%p\n",waf_engine);
return waf_engine;
}
int aisec_waf_predict(PyObject* pModule,PyObject* pEngine,const char** pkt_loads)
{
int i;
int isAtk = -1;
int fields_num = 2;
printf("[C++]waf predicting ....\n");
printf("[C++]pModule=%p\tpEngine=%p\n",pModule,pEngine);
PyObject* waf_predict = PyObject_GetAttrString(pModule, "waf_predict");
if (!waf_predict || !PyCallable_Check(waf_predict))
{
printf("[C++] Can't find funftion (waf_predict).\n");
exit(0);
}
PyObject* args = PyTuple_New(2);
PyObject* arg0 = pEngine;
PyObject* arg1 = PyList_New(fields_num);
for(i=0; i<fields_num; i++)
{
PyList_SetItem(arg1, i, Py_BuildValue("s", pkt_loads[i]));
printf("pkt_load = %s\n",pkt_loads[i]);
}
PyTuple_SetItem(args, 0, arg0);
PyTuple_SetItem(args, 1, arg1);
PyObject* waf_predict_info = PyObject_CallObject(waf_predict,args);
if (waf_predict_info == NULL)
{
printf("[C++] Can't run funftion (waf_predict).\n");
exit(0);
}
PyArg_ParseTuple(waf_predict_info,"i",&isAtk);
printf("isAtk: %d\n", isAtk);
return isAtk;
}
# waf.py by ailx10
import re
import urllib
from sklearn import svm
from sklearn.externals import joblib
from sklearn import metrics
from sklearn.model_selection import train_test_split
def get_len(url):
return len(url)
def get_url_count(url):
if re.search('(http://)|(https://)', url, re.IGNORECASE) :
return 1
else:
return 0
def get_xss_evil_char(url):
return len(re.findall("[<>,\'\"/]", url, re.IGNORECASE))
def get_xss_evil_word(url):
return len(re.findall("(alert)|(script=)(%3c)|(%3e)|(%20)|(onerror)|(onload)|(eval)|(src=)|(prompt)",url,re.IGNORECASE))
def get_sql_evil_char(url):
return len(re.findall("[-,\'\"*/]", url, re.IGNORECASE))
def get_sql_evil_word(url):
return len(re.findall("(SELECT)|(CASE)|(WHEN)|(ORDER)|(GROUP)|(count)|(%2C%20)|(char)|(NULL)|(AND)",url,re.IGNORECASE))
def get_feature(filename,x,y,atk_index):
with open(filename) as f:
for line in f:
line = line.strip('\n')
line = urllib.unquote(line)
f1=get_len(line)
f2=get_url_count(line)
f3=get_xss_evil_char(line)
f4=get_xss_evil_word(line)
f5=get_sql_evil_char(line)
f6=get_sql_evil_word(line)
x.append([f1,f2,f3,f4,f5,f6])
y.append(atk_index)
def do_metrics(y_test,y_pred):
print "metrics.accuracy_score:"
print metrics.accuracy_score(y_test, y_pred)
print "metrics.confusion_matrix:"
print metrics.confusion_matrix(y_test, y_pred)
print "metrics.recall_score:"
print metrics.recall_score(y_test, y_pred,average=None)
print("saved!")
def train():
x = []
y = []
get_feature('./data/sql-10000.txt',x,y,2)
get_feature('./data/xss-200000.txt',x,y,1)
get_feature('./data/good-10000.txt',x,y,0)
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=0)
clf = svm.SVC(kernel='linear').fit(x_train, y_train)
y_pred = clf.predict(x_test)
joblib.dump(clf,"./model/waf.pkl")
do_metrics(y_test, y_pred)
def get_pkt_feature(pkts):
x = []
for pkt in pkts:
pkt = urllib.unquote(pkt)
f1=get_len(pkt)
f2=get_url_count(pkt)
f3=get_xss_evil_char(pkt)
f4=get_xss_evil_word(pkt)
f5=get_sql_evil_char(pkt)
f6=get_sql_evil_word(pkt)
x.append([f1,f2,f3,f4,f5,f6])
return x
def waf_load():
clf=joblib.load("/home/nginx/aisecurity-0.2/model/waf.pkl")
return clf
def waf_predict(clf,pkts):
atk_flag = 0
pkts = get_pkt_feature(pkts)
atk_predicts = clf.predict(pkts)
for atk_predict in atk_predicts:
if atk_predict > 0:
atk_flag = 1
break
return (atk_flag,)
def test(clf):
x1 = "i love web security!"
x2 = "<script>alert(1)</script>"
x3 = "-2' union select group_concat(Username),2,3 from Person"
x = [x1,x2,x3]
pkts = get_pkt_feature(x)
print(clf.predict(pkts))
if __name__ == "__main__":
train()
clf = waf_load()
test(clf)
更多推荐




所有评论(0)