逻辑回归一般指logistic回归。 logistic回归又称logistic回归分析,是一种广义的线性回归分析模型,常用于数据挖掘,疾病自动诊断,经济预测等领域。
本小结是基于ADFA-LD数据集使用逻辑回归算法检测JAVA溢出攻击。ADFA-LD数据集中记录了系统调用序列,使用系统调用号标识每一个系统调用,这样就将一连串的系统调用转换成一连串的系统调用号了。
1.白样本
正常样本共有833个,代码如下
def load_adfa_training_files(rootdir):
x=[]
y=[]
list = os.listdir(rootdir)
for i in range(0, len(list)):
path = os.path.join(rootdir, list[i])
if os.path.isfile(path):
x.append(load_one_flle(path))
print("Load file(%s)" % path)
y.append(0)
return x,y
x1,y1=load_adfa_training_files("../data/ADFA-LD/Training_Data_Master/")
2.黑样本
黑样本共124个样本,具体代码如下所示:
def dirlist(path, allfile):
filelist = os.listdir(path)
for filename in filelist:
filepath = os.path.join(path, filename)
if os.path.isdir(filepath):
dirlist(filepath, allfile)
else:
allfile.append(filepath)
return allfile
file_prefix = r"../data/ADFA-LD/Attack_Data_Master/Java_Meterpreter_"
def load_adfa_java_files(rootdir):
x=[]
y=[]
allfile=dirlist(rootdir,[])
for file in allfile:
if re.match(file_prefix,file):
print("Load file(%s)" % file)
x.append(load_one_flle(file))
y.append(1)
return x,y
x2,y2=load_adfa_java_files("../data/ADFA-LD/Attack_Data_Master/")
3.数据集处理
if __name__ == '__main__':
x1,y1=load_adfa_training_files("../data/ADFA-LD/Training_Data_Master/")
x2,y2=load_adfa_java_files("../data/ADFA-LD/Attack_Data_Master/")
x=x1+x2
y=y1+y2
print(len(x1), len(x2), len(x))
数据集中黑样本124,白样本833,总数据处理数量为957:
833 124 957
4. 特征化处理
vectorizer = CountVectorizer(min_df=1)
x=vectorizer.fit_transform(x)
x=x.toarray()
5.逻辑回归训练
mlp = MLPClassifier(hidden_layer_sizes=(150,50), max_iter=10, alpha=1e-4,
solver='sgd', verbose=10, tol=1e-4, random_state=1,
learning_rate_init=.1)
logreg = linear_model.LogisticRegression(C=1e5)
score = model_selection.cross_val_score(logreg, x, y, n_jobs=1, cv=10)
print(np.mean(score))
(1)报警
FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
(2)报警
ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. "the number of iterations.", ConvergenceWarning)
修改
logreg = linear_model.LogisticRegression(C=1e5, solver='liblinear', max_iter=10000)
6.完整代码
# -*- coding:utf-8 -*-
import re
import os
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import model_selection
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn import linear_model, datasets
def load_one_flle(filename):
x=[]
with open(filename) as f:
line=f.readline()
line=line.strip('\n')
return line
def load_adfa_training_files(rootdir):
x=[]
y=[]
list = os.listdir(rootdir)
for i in range(0, len(list)):
path = os.path.join(rootdir, list[i])
if os.path.isfile(path):
x.append(load_one_flle(path))
#print("Load file(%s)" % path)
y.append(0)
return x,y
def dirlist(path, allfile):
filelist = os.listdir(path)
for filename in filelist:
filepath = os.path.join(path, filename)
if os.path.isdir(filepath):
dirlist(filepath, allfile)
else:
allfile.append(filepath)
return allfile
file_prefix = r"../data/ADFA-LD/Attack_Data_Master/Java_Meterpreter_"
def load_adfa_java_files(rootdir):
x=[]
y=[]
allfile=dirlist(rootdir,[])
for file in allfile:
if re.match(file_prefix,file):
#print("Load file(%s)" % file)
x.append(load_one_flle(file))
y.append(1)
return x,y
if __name__ == '__main__':
x1,y1=load_adfa_training_files("../data/ADFA-LD/Training_Data_Master/")
x2,y2=load_adfa_java_files("../data/ADFA-LD/Attack_Data_Master/")
x=x1+x2
y=y1+y2
print(len(x1), len(x2), len(x))
vectorizer = CountVectorizer(min_df=1)
x=vectorizer.fit_transform(x)
x=x.toarray()
mlp = MLPClassifier(hidden_layer_sizes=(150,50), max_iter=10, alpha=1e-4,
solver='sgd', verbose=10, tol=1e-4, random_state=1,
learning_rate_init=.1)
logreg = linear_model.LogisticRegression(C=1e5, solver='liblinear', max_iter=10000)
score = model_selection.cross_val_score(logreg, x, y, n_jobs=1, cv=10)
print(np.mean(score))
7.运行结果
[0.93814433 0.98969072 0.93814433 0.95833333 0.93684211 0.77894737
0.91578947 0.96842105 0.97894737 0.97894737]
0.938220745161874
如上,准确率为93%
本文为互联网自动采集或经作者授权后发布,本文观点不代表立场,若侵权下架请联系我们删帖处理!文章出自:https://blog.csdn.net/mooyuan/article/details/122759134