本小节通过RNN识别异常操作。
1、数据集
def load_user_cmd_new(filename):
cmd_list=[]
dist=[]
with open(filename) as f:
i=0
x=[]
for line in f:
line=line.strip('\n')
x.append(line)
dist.append(line)
i+=1
if i == 100:
cmd_list.append(x)
x=[]
i=0
fdist = list(FreqDist(dist).keys())
return cmd_list,fdist
if __name__ == '__main__':
user_cmd_list,dist=load_user_cmd_new("../data/MasqueradeDat/User7")
print("Dist:(%s)" % dist)
n_words=len(dist)
2、特征处理
def get_user_cmd_feature_new(user_cmd_list,dist):
user_cmd_feature=[]
for cmd_list in user_cmd_list:
x=[]
for cmd in cmd_list:
v = [0] * len(dist)
for i in range(0, len(dist)):
if cmd == dist[i]:
v[i] = 1
x.append(v)
user_cmd_feature.append(x)
return user_cmd_feature
user_cmd_feature=get_user_cmd_feature_new(user_cmd_list,dist)
labels=get_label("../data/MasqueradeDat/label.txt",6)
y=[0]*50+labels
关于特征化和数据集处理,可以参考之前5.4节的博客信息 《Web安全之机器学习入门》笔记:第五章 5.4 K近邻检测异常操作(二)_mooyuan的博客-CSDN博客
3、 RNN模型构建
# Network building
net = tflearn.input_data(shape=[None, 100,n_words])
net = tflearn.lstm(net, 10, return_seq=True)
net = tflearn.lstm(net, 10, )
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.1,name="output",
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=3)
model.fit(x_train, y_train, validation_set=(x_test, y_test), show_metric=True,
batch_size=32,run_id="maidou")
4、完整代码
# -*- coding:utf-8 -*-
import numpy as np
from nltk.probability import FreqDist
from tflearn.data_utils import to_categorical, pad_sequences
import tflearn
#测试样本数
N=80
def load_user_cmd_new(filename):
cmd_list=[]
dist=[]
with open(filename) as f:
i=0
x=[]
for line in f:
line=line.strip('\n')
x.append(line)
dist.append(line)
i+=1
if i == 100:
cmd_list.append(x)
x=[]
i=0
fdist = list(FreqDist(dist).keys())
return cmd_list,fdist
def load_user_cmd(filename):
cmd_list=[]
dist_max=[]
dist_min=[]
dist=[]
with open(filename) as f:
i=0
x=[]
for line in f:
line=line.strip('\n')
x.append(line)
dist.append(line)
i+=1
if i == 100:
cmd_list.append(x)
x=[]
i=0
fdist = list(FreqDist(dist).keys())
dist_max=set(fdist[0:50])
dist_min = set(fdist[-50:])
return cmd_list,dist_max,dist_min
def get_user_cmd_feature(user_cmd_list,dist_max,dist_min):
user_cmd_feature=[]
for cmd_block in user_cmd_list:
f1=len(set(cmd_block))
fdist = list(FreqDist(cmd_block).keys())
f2=fdist[0:10]
f3=fdist[-10:]
f2 = len(set(f2) & set(dist_max))
f3=len(set(f3)&set(dist_min))
x=[f1,f2,f3]
user_cmd_feature.append(x)
return user_cmd_feature
def get_user_cmd_feature_new(user_cmd_list,dist):
user_cmd_feature=[]
for cmd_list in user_cmd_list:
x=[]
for cmd in cmd_list:
v = [0] * len(dist)
for i in range(0, len(dist)):
if cmd == dist[i]:
v[i] = 1
x.append(v)
user_cmd_feature.append(x)
return user_cmd_feature
def get_label(filename,index=0):
x=[]
with open(filename) as f:
for line in f:
line=line.strip('\n')
x.append( int(line.split()[index]))
return x
def do_knn(x_train,y_train,x_test,y_test):
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(x_train, y_train)
y_predict=neigh.predict(x_test)
score = np.mean(y_test == y_predict) * 100
print(score)
def do_rnn(x_train,x_test,y_train,y_test):
global n_words
# Data preprocessing
# Sequence padding
print("GET n_words embedding %d" % n_words)
#x_train = pad_sequences(x_train, maxlen=100, value=0.)
#x_test = pad_sequences(x_test, maxlen=100, value=0.)
# Converting labels to binary vectors
y_train = to_categorical(y_train, nb_classes=2)
y_test = to_categorical(y_test, nb_classes=2)
# Network building
net = tflearn.input_data(shape=[None, 100,n_words])
net = tflearn.lstm(net, 10, return_seq=True)
net = tflearn.lstm(net, 10, )
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.1,name="output",
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=3)
model.fit(x_train, y_train, validation_set=(x_test, y_test), show_metric=True,
batch_size=32,run_id="maidou")
if __name__ == '__main__':
user_cmd_list,dist=load_user_cmd_new("../data/MasqueradeDat/User7")
print("Dist:(%s)" % dist)
n_words=len(dist)
user_cmd_feature=get_user_cmd_feature_new(user_cmd_list,dist)
labels=get_label("../data/MasqueradeDat/label.txt",6)
y=[0]*50+labels
x_train=user_cmd_feature[0:N]
y_train=y[0:N]
x_test=user_cmd_feature[N:150]
y_test=y[N:150]
#print(x_train)
do_rnn(x_train,x_test,y_train,y_test)
5、运行结果
Log directory: /tmp/tflearn_logs/
---------------------------------
Training samples: 80
Validation samples: 70
--
Training Step: 1 | time: 34.305s
| Adam | epoch: 001 | loss: 0.00000 - acc: 0.0000 -- iter: 32/80
Training Step: 2 | total loss: 0.62365 | time: 34.435s
| Adam | epoch: 001 | loss: 0.62365 - acc: 0.5344 -- iter: 64/80
Training Step: 3 | total loss: 0.48411 | time: 40.440s
| Adam | epoch: 001 | loss: 0.48411 - acc: 0.8898 | val_loss: 0.29831 - val_acc: 0.9143 -- iter: 80/80
--
Training Step: 4 | total loss: 0.40294 | time: 0.098s
| Adam | epoch: 002 | loss: 0.40294 - acc: 0.8787 -- iter: 32/80
Training Step: 5 | total loss: 0.28533 | time: 0.205s
| Adam | epoch: 002 | loss: 0.28533 - acc: 0.9194 -- iter: 64/80
Training Step: 6 | total loss: 0.33141 | time: 1.358s
| Adam | epoch: 002 | loss: 0.33141 - acc: 0.9109 | val_loss: 0.34880 - val_acc: 0.9143 -- iter: 80/80
--
Training Step: 7 | total loss: 0.35884 | time: 0.083s
| Adam | epoch: 003 | loss: 0.35884 - acc: 0.9081 -- iter: 32/80
Training Step: 8 | total loss: 0.30322 | time: 0.203s
| Adam | epoch: 003 | loss: 0.30322 - acc: 0.9247 -- iter: 64/80
Training Step: 9 | total loss: 0.27859 | time: 1.314s
| Adam | epoch: 003 | loss: 0.27859 - acc: 0.9315 | val_loss: 0.32583 - val_acc: 0.9143 -- iter: 80/80
--
Training Step: 10 | total loss: 0.15370 | time: 0.113s
| Adam | epoch: 004 | loss: 0.15370 - acc: 0.9657 -- iter: 32/80
Training Step: 11 | total loss: 0.40049 | time: 0.236s
| Adam | epoch: 004 | loss: 0.40049 - acc: 0.8931 -- iter: 64/80
Training Step: 12 | total loss: 0.32694 | time: 1.360s
| Adam | epoch: 004 | loss: 0.32694 - acc: 0.9131 | val_loss: 0.29173 - val_acc: 0.9143 -- iter: 80/80
--
Training Step: 13 | total loss: 0.28654 | time: 0.121s
| Adam | epoch: 005 | loss: 0.28654 - acc: 0.9236 -- iter: 32/80
Training Step: 14 | total loss: 0.29622 | time: 0.269s
| Adam | epoch: 005 | loss: 0.29622 - acc: 0.9165 -- iter: 64/80
Training Step: 15 | total loss: 0.30216 | time: 1.412s
| Adam | epoch: 005 | loss: 0.30216 - acc: 0.9125 | val_loss: 0.29969 - val_acc: 0.9143 -- iter: 80/80
--
Training Step: 16 | total loss: 0.24097 | time: 0.114s
| Adam | epoch: 006 | loss: 0.24097 - acc: 0.9453 -- iter: 32/80
Training Step: 17 | total loss: 0.20250 | time: 0.232s
| Adam | epoch: 006 | loss: 0.20250 - acc: 0.9650 -- iter: 64/80
Training Step: 18 | total loss: 0.24004 | time: 1.371s
| Adam | epoch: 006 | loss: 0.24004 - acc: 0.9447 | val_loss: 0.29157 - val_acc: 0.9143 -- iter: 80/80
--
Training Step: 19 | total loss: 0.28716 | time: 0.098s
| Adam | epoch: 007 | loss: 0.28716 - acc: 0.9214 -- iter: 32/80
Training Step: 20 | total loss: 0.22043 | time: 0.243s
| Adam | epoch: 007 | loss: 0.22043 - acc: 0.9467 -- iter: 64/80
Training Step: 21 | total loss: 0.17199 | time: 1.392s
| Adam | epoch: 007 | loss: 0.17199 - acc: 0.9632 | val_loss: 0.30657 - val_acc: 0.9143 -- iter: 80/80
--
Training Step: 22 | total loss: 0.27338 | time: 0.118s
| Adam | epoch: 008 | loss: 0.27338 - acc: 0.9274 -- iter: 32/80
Training Step: 23 | total loss: 0.26280 | time: 0.238s
| Adam | epoch: 008 | loss: 0.26280 - acc: 0.9303 -- iter: 64/80
Training Step: 24 | total loss: 0.20052 | time: 1.373s
| Adam | epoch: 008 | loss: 0.20052 - acc: 0.9499 | val_loss: 0.31832 - val_acc: 0.9143 -- iter: 80/80
--
Training Step: 25 | total loss: 0.15587 | time: 0.142s
| Adam | epoch: 009 | loss: 0.15587 - acc: 0.9636 -- iter: 32/80
Training Step: 26 | total loss: 0.20714 | time: 0.270s
| Adam | epoch: 009 | loss: 0.20714 - acc: 0.9484 -- iter: 64/80
Training Step: 27 | total loss: 0.26675 | time: 1.387s
| Adam | epoch: 009 | loss: 0.26675 - acc: 0.9295 | val_loss: 0.29838 - val_acc: 0.9143 -- iter: 80/80
--
Training Step: 28 | total loss: 0.20848 | time: 0.152s
| Adam | epoch: 010 | loss: 0.20848 - acc: 0.9471 -- iter: 32/80
Training Step: 29 | total loss: 0.16654 | time: 0.289s
| Adam | epoch: 010 | loss: 0.16654 - acc: 0.9600 -- iter: 64/80
Training Step: 30 | total loss: 0.20458 | time: 1.404s
| Adam | epoch: 010 | loss: 0.20458 - acc: 0.9473 | val_loss: 0.26383 - val_acc: 0.9143 -- iter: 80/80
--
本文为互联网自动采集或经作者授权后发布,本文观点不代表立场,若侵权下架请联系我们删帖处理!文章出自:https://blog.csdn.net/mooyuan/article/details/122769262