图像处理

Pillow

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#基本操作
from PIL import Image #引用Image类
im = Image.new('RGB',(110,80)) #新建图片
im = Image.open('x.jpg') #加载图片
print(im.format,im.size,im.mode) #format-格式,size-(宽&高),mode-模式
im.show() #呈现图片
im.getcolors() #获取图片信息,一般会返回一个元组(count,(r,g,b))。该元组第一个元素count 代表该颜色像素出现的次数,第二个元素表示(rgb)。
im.convert('RGB').getpixel((0,0)) #获取像素点的RGB值
Image.save('y.png') #保存文件

#图片剪切
#box是一个4元的坐标数组,坐标轴是左上角是(0,0)的卡迪尔坐标系。假设box是(x1,y1,x2,y2),则所取区域是以各自坐标划线所围的区域。
im = Image.open('x.jpg')
box = (150,150,245,280)
region = im.crop(box)
region.show()

#图片粘贴
#将一张图覆盖到另一张图上面。格式为:paste(要贴的图片,要贴的图片的4元坐标组成的区域)。
im = Image.open('x.jpg')
box = (50,50,200,200)
region = im.crop(box)
# 将图片逆序旋转180后,黏贴到原来复制的位置
region = region.transpose(Image.ROTATE_180)
im.paste(region,box)
im.show()

#图像序列
#当处理GIF这种包含多个帧的图片,称之为序列文件,PIL会自动打开序列文件的第一帧。而使用seek和tell方法可以在不同帧移动。tell是帧数,而seek是取当前帧数的图片。
from PIL import Image
im = Image.open("laopo.gif")
im.seek(1)
im.show()
try:
while 1:
im.seek(im.tell()+1)
im.show()
except EOFError:
pass

#读取/修改像素
from PIL import Image
img = Image.open('x.jpg')
width , height = img.size
for i in range(0,width):
for j in range(0,height):
tmp = img.getpixel((i,j))
img.putpixel((i,j),(0,0,tmp[2]))
img.show()

OpenCV2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#图像读入
import cv2
img = cv2.imread(r'x.jpg',0)
#图像显示
cv2.imshow('tupian',img)
cv2.waitKey()
#图像保存
cv2.imwrite('tupian,jpg',img)

#图像属性
print(img.shape) #shape 返回图像行数、列数、通道数
print(img.size) #size 返回图像像素数
print(img.dtype) #dtype 返回图像的数据类型

#通道拆分/合并
#拆分为B、G、R三个通道
#索引拆分
B = img[:,:,0]
G = img[:,:,1]
R = img[:,:,2]
#函数拆分
B,G,R = cv2.split(img)
#合并
bgr = cv2.merge([b,g,r])

#类型转换
#将BGR模式转换为灰度图像,再将灰度图像转换为RGB模式
import cv2
import numpy as np
bgr=np.random.randint(0,256,size=[2,4,3],dtype=np.uint8) #生成2×4×3的BGR图像
gray=cv2.cvtColor(bgr,cv2.COLOR_BGR2GRAY) #BGR图像转换为灰度图像
rgb=cv2.cvtColor(gray,cv2.COLOR_GRAY2RGB) #灰度图像转换为RGB图像
print('bgr=\n',bgr)
print('gray=\n',gray)
print('rgb=\n',rgb)
# 在RGB色彩空间的基础上,还可以加一个A通道,叫做Alpha通道,此时原图像类型转变为RGBA模式,例如常见的PNG类型图像就是RGBA色彩空间的。
#分析alpha通道的值
import cv2
import numpy as np
img=np.random.randint(0,256,size=[2,3,3],dtype=np.uint8) #生成2×3×3的BGR图像
bgra=cv2.cvtColor(img,cv2.COLOR_BGR2BGRA)
print('img=\n',img)
print('bgra=\n',bgra)
b,g,r,a=cv2.split(bgra)
print('a=\n',a)
a[:,:]=125
bgra=cv2.merge([b,g,r,a])
print('bgra=\n',bgra)

#傅里叶变换
img = cv2.imread(r"x.jpg",0)
# 图像数据要转换成float32
img_float32 = np.float32(img)
#进行傅里叶变换
dft = cv2.dft(img_float32,flags = cv2.DFT_COMPLEX_OUTPUT)
# 将低频信息转换至图像中心
dft_shift = np.fft.fftshift(dft)
# 傅里叶变换后的数据是由实部虚部构成的,需要进行转换成图像格式才能显示(0,255)
magnitude = 20*np.log(cv2.magnitude(dft_shift[:,:,0],dft_shift[:,:,1]))
plt.subplot(121),plt.imshow(img,cmap = 'gray')
plt.subplot(122),plt.imshow(magnitude,cmap='gray')
plt.show()

Matplotlib

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#显示图片
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np

picture = mpimg.imread('x.jpg') # 读取和代码处于同一目录下的图片
plt.imshow(picture) # 显示图片
plt.axis('on') # 显示坐标轴
plt.show() #因为idle是pycharm所以多一行显示命令

#显示RGB某个通道
lena1 = picture*[0,0,1] # 显示图片的一个通道
lena2 = picture*[1,0,0]
lena3 = picture*[0,1,0]

#RGB转灰度
def rgb2gray2(rgb):
r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2] #数组切片
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
gray_pro = rgb2gray2(picture)
plt.imshow(gray_pro,cmap = plt.get_cmap('gray'))
plt.axis('on') # 显示坐标轴
plt.show()

#保存图片
plt.savefig('xx.jpg')

OCR

验证码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os
import time
import re
from PIL import Image
import ddddocr
import onnxruntime
import requests
from base64 import b64decode
url = "http://"
sess = requests.Session()
ocr = ddddocr.DdddOcr(use_gpu=True)
onnxruntime.set_default_logger_severity(3)

def pic_to_text(text):
img = re.findall(r"base64,(.*?)>", text)[0]
open("v_code_LA.png", "wb").write(b64decode(img.encode()))
v_code = Image.open("v_code_LA.png")
v_code.convert("L")
v_code.save("v_code.png")
v_code_bytes = open('v_code.png', 'rb').read()
code = ocr.classification(v_code_bytes)
os.remove("v_code.png")
return code.lower()

def validate(text):
cnt = int(re.findall(r"done (.*?) times", text)[0])
if cnt == 1000:
print(text)
return
code = pic_to_text(text)
if len(code) != 4:
return
data = {
"v_code": code
}
sess.post(url,data)

if __name__ == "__main__":
while 1:
res = sess.get(url)
validate(res.text)
time.sleep(2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# 百度API
from urllib import response
import requests
import base64

url = "http://47.97.127.1:28583/"

def getToken():
token_url = url + "/getToken"
response = requests.get(token_url)
return response.json()['data']['token']

def ocr(img_base64):
# client_id 为官网获取的AK, client_secret 为官网获取的SK
host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=【你的】&client_secret=【你的】'
response = requests.get(host)
token = response.json()['access_token']

'''
通用文字识别(高精度版)
'''

request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"

params = {"image":img_base64}
access_token = token
request_url = request_url + "?access_token=" + access_token
headers = {'content-type': 'application/x-www-form-urlencoded'}
response = requests.post(request_url, data=params, headers=headers)
return response.json()['words_result']

def getViolWords():
words_url = url + "/getViolWords"
response = requests.get(words_url)
return response.json()['data']['violWords']

def getPic(token):
pic_url = url + "/getPic"
data = {"token":token}
response = requests.post(pic_url,json=data)
return response.json()['data']['words']['w1']

def checkWords(violWords,picWords):
try:
picWords = picWords[0]['words']
except:
pass
print(picWords)
for i in violWords:
if i.replace(" ",'').strip() in picWords:
return False
return True

def submit(token,answer):
submit_url = url + "/submits"
data = {"token":token,"answer":answer}
response = requests.post(submit_url,json=data)
return response.json()

def getResult(token):
result_url = url + "/getResult"
data = {"token":token}
response = requests.post(result_url,json=data)
return response.json()['data']

def getFlag(token):
flag_url = url + "/getFlag"
data = {"token":token}
response = requests.post(flag_url,json=data)
return response.json()


token = getToken()
violWords = getViolWords()
for i in range(51):
pic = getPic(token)
picWords = ocr(pic)
result = checkWords(violWords,picWords)
print(result)
print(submit(token,result))
print(getResult(token))
print(getFlag(token))

拼图

1
2
3
4
5
6
7
8
9
10
11
12
13
montage *.jpg -tile 10x12 -geometry +0+0 out.jpg
montage *.jpg -tile 10x12 -geometry 200x100+0+0 out.jpg #把图片碎片合成一个图片
# 将目录中的jpg文件按顺序拼成x轴10块,y轴12块,每个图块大小为200x100像素,输出文件为out.jpg

gaps run out.jpg out2.jpg
gaps --image=out.jpg --size=100 --save
gaps --image=out.jpg --generations=50 --population=120 --size=100 --save#还原原图片
# --image 指向拼图的路径
# --size 拼图块的像素尺寸(每张小图,即拼图小块的大小)
# --generations 遗传算法的代的数量(迭代次数)
# --population 个体数量(拼图总数)
# --verbose 每一代训练结束后展示最佳结果(实时显示)
# --save 将拼图还原为图像

Arnold变换 / 猫脸变换

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import cv2
import numpy as np

def arnold_decode(image, shuffle_times, a, b):
""" decode for rgb image that encoded by Arnold
Args:
image: rgb image encoded by Arnold
shuffle_times: how many times to shuffle
Returns:
decode image
"""
# 1:创建新图像
decode_image = np.zeros(shape=image.shape,dtype=int)

# 2:计算N
h, w = image.shape[0], image.shape[1]
N = h # 或N=w

# 3:遍历像素坐标变换
for time in range(shuffle_times):
for ori_x in range(h):
for ori_y in range(w):
# 按照公式坐标变换
new_x = ((a * b + 1) * ori_x + (-b) * ori_y) % N
new_y = ((-a) * ori_x + ori_y) % N
decode_image[new_x, new_y, :] = image[ori_x, ori_y, :]
return decode_image

img = cv2.imread("a.png") #变换的图片
a = 121
b = 144
st = 1
pic = arnold_decode(img,st,a,b)
cv2.imwrite('b.png',pic) #保存得到的图片

工具:

Catmap

Zhanxw Cat

马赛克

unredacter