引言:复现Pyramid Feature Attention Network for Saliency detection。该文发表于CVPR2019,有两种开源实现,基于pytorch和基于keras,本文都进行了尝试,其中主要使用Keras版本。
条件所限,我使用的机器为Jetson Xavier。根据项目安装的包的版本如下:
xavier安装的opencv3.4.3
不过该环境会有很多的warning,可能和作者使用的还不一样。
如下。
from keras import callbacks, optimizers
import tensorflow as tf
import os
from keras.layers import Input
from model import VGG16
from data import getTrainGenerator
from utils import *
from edge_hold_loss import *
import math
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def lr_scheduler(epoch):
drop = 0.5
epoch_drop = epochs/8.
lr = base_lr * math.pow(drop, math.floor((1+epoch)/epoch_drop))
print('lr: %f' % lr)
return lr
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Train model your dataset')
parser.add_argument('--train_file',default='train_pair.txt',help='your train file', type=str)
parser.add_argument('--model_weights',default='model/vgg16_no_top.h5',help='your model weights', type=str)
args = parser.parse_args()
model_name = args.model_weights
'''
the from of 'train_pair.txt' is
img_path1 gt_path1\n
img_path2 gt_path2\n
'''
train_path = args.train_file
print("train_file", train_path)
print("model_weights", model_name)
target_size = (256,256)
batch_size = 15
base_lr = 1e-2
epochs = 50
f = open(train_path, 'rb') # encoding='unicode_escape'
trainlist = f.readlines()
f.close()
steps_per_epoch = len(trainlist)/batch_size
optimizer = optimizers.SGD(lr=base_lr, momentum=0.9, decay=0)
# optimizer = optimizers.Adam(lr=base_lr)
loss = EdgeHoldLoss
metrics = [acc,pre,rec]
dropout = True
with_CPFE = True
with_CA = True
with_SA = True
log = './PFA.csv'
tb_log = './tensorboard-logs/PFA'
model_save = 'model/PFA_'
model_save_period = 5
if target_size[0 ] % 32 != 0 or target_size[1] % 32 != 0:
raise ValueError('Image height and wight must be a multiple of 32')
print(type(target_size[0]),' ', type(target_size[1]))
print(target_size[0], ' ', target_size[1])
traingen = getTrainGenerator(train_path, target_size, batch_size, israndom=True)
model_input = Input(shape=(target_size[0],target_size[1],3))
model = VGG16(model_input,dropout=dropout, with_CPFE=with_CPFE, with_CA=with_CA, with_SA=with_SA)
for i,layer in enumerate(model.layers):
print(i,layer.name)
model.load_weights(model_name,by_name=True)
tb = callbacks.TensorBoard(log_dir=tb_log)
lr_decay = callbacks.LearningRateScheduler(schedule=lr_scheduler)
es = callbacks.EarlyStopping(monitor='loss', patience=3, verbose=0, mode='auto')
modelcheck = callbacks.ModelCheckpoint(model_save+'{epoch:05d}.h5', monitor='loss', verbose=1,
save_best_only=False, save_weights_only=True, mode='auto', period=model_save_period)
callbacks = [lr_decay,modelcheck,tb]
model.compile(optimizer=optimizer,loss=loss,metrics=metrics)
model.fit_generator(traingen, steps_per_epoch=steps_per_epoch,
epochs=epochs,verbose=1,callbacks=callbacks)
用于预测测试的test文本如下:
import cv2
import numpy as np
import os
from keras.layers import Input
from model import VGG16
import matplotlib.pyplot as plt
def padding(x):
h,w,c = x.shape
size = max(h,w)
paddingh = (size-h)//2
paddingw = (size-w)//2
temp_x = np.zeros((size,size,c))
temp_x[paddingh:h+paddingh,paddingw:w+paddingw,:] = x
return temp_x
def load_image(path):
x = cv2.imread(path)
sh = x.shape
x = np.array(x, dtype=np.float32)
x = x[..., ::-1]
# Zero-center by mean pixel
x[..., 0] -= 103.939
x[..., 1] -= 116.779
x[..., 2] -= 123.68
x = padding(x)
x = cv2.resize(x, target_size, interpolation=cv2.INTER_LINEAR)
x = np.expand_dims(x,0)
return x,sh
def cut(pridict,shape):
h,w,c = shape
size = max(h, w)
pridict = cv2.resize(pridict, (size,size))
paddingh = (size - h) // 2
paddingw = (size - w) // 2
return pridict[paddingh:h + paddingh, paddingw:w + paddingw]
def sigmoid(x):
return 1/(1 + np.exp(-x))
def getres(pridict,shape):
pridict = sigmoid(pridict)*255
pridict = np.array(pridict, dtype=np.uint8)
pridict = np.squeeze(pridict)
pridict = cut(pridict, shape)
return pridict
def laplace_edge(x):
laplace = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]])
edge = cv2.filter2D(x/255.,-1,laplace)
edge = np.maximum(np.tanh(edge),0)
edge = edge * 255
edge = np.array(edge, dtype=np.uint8)
return edge
if __name__ == '__main__':
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
model_name = 'model/PFA_00050.h5'
target_size = (256,256)
dropout = False
with_CPFE = True
with_CA = True
with_SA = True
if target_size[0 ] % 32 != 0 or target_size[1] % 32 != 0:
raise ValueError('Image height and wight must be a multiple of 32')
model_input = Input(shape=(target_size[0],target_size[1],3))
model = VGG16(model_input,dropout=dropout, with_CPFE=with_CPFE, with_CA=with_CA, with_SA=with_SA)
model.load_weights(model_name,by_name=True)
for layer in model.layers:
layer.trainable = False
# image_path = 'image/3.jpg'
image_path = '/home/bafs/SODDatasets/CSSD/images/69015.jpg'
img, shape = load_image(image_path)
img = np.array(img, dtype=np.float32)
sa = model.predict(img)
sa = getres(sa, shape)
plt.title('saliency')
plt.subplot(131)
plt.imshow(cv2.imread(image_path))
plt.subplot(132)
plt.imshow(sa, cmap='gray')
plt.subplot(133)
edge = laplace_edge(sa)
plt.imshow(edge, cmap='gray')
plt.show()
plt.savefig('./res/res.jpg')
初步测试,使用CSDD作为训练集合,训练50个epochs,使用上述test.py文件进行预测。结果还是很不错的,如下:
后续:在仔细阅读一下该文献,做分享。
本文地址:https://blog.csdn.net/wbzhang233/article/details/107423752
如对本文有疑问, 点击进行留言回复!!
最近想下m3u8格式视频流但是网址太卡好慢看不了所以搞了个python脚本下载 ,给有需要的也用用 ,可以有点小问题大家可以改改,搬或者移到其他视频流下载上,不要嫌弃
PAT 甲级真题 1006 Sign In and Sign Out (25分) python实现
OpenCV计算机视觉实战(Python)| 03、阈值与平滑处理
网友评论