forked from LJSthu/Python-Remove-Watermark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwatermark.py
68 lines (63 loc) · 2.69 KB
/
watermark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Import necessary libraries
import os
import fitz
import numba
import argparse
import numpy as np
from PIL import Image
from skimage import io
# Create an argument parser for command line arguments
parser = argparse.ArgumentParser(description='Remove Watermark')
# source file path
parser.add_argument('--source', default='source.pdf', type=str, help='source file')
# target directory
parser.add_argument('--target', default='out', type=str, help='target directory')
# numba's JIT decorator for speed
@numba.jit(nopython=True)
def handle(img):
shape = img.shape
# Turn the image into a one-dimensional array and avoid loops that are too deep for nesting
img = img.reshape(-1, 3)
for i in range(len(img)):
# If the RGB values of the pixel are between 175 and 250, assume it's part of the watermark and set it to white
if 175 < img[i][0] < 250 and 175 < img[i][1] < 250 and 175 < img[i][2] < 250:
img[i] = [255, 255, 255]
# Reshape the image back
img = img.reshape(shape)
return img
if __name__ == '__main__':
# Parse command line arguments
args = parser.parse_args()
# Check if the target directory exists, and create it if it does not
if not os.path.exists(args.target):
os.mkdir(args.target)
# Get the file extension of the source file
fileext = os.path.splitext(os.path.basename(args.source))[1]
# Handle different image formats based on the file extension
if fileext == '.jpg':
img = io.imread(args.source)
io.imsave(os.path.join(args.target, 'result{}'.format(fileext)), handle(img))
elif fileext == '.png':
# Read the png and convert it to RGB mode
img = np.array(Image.open(args.source).convert('RGB'))
io.imsave(os.path.join(args.target, 'result{}'.format(fileext)), handle(img))
elif fileext == '.pdf':
# Convert the pdf file to a list of images
imgs = []
pdf = fitz.open(args.source)
# zoom_factor = 1.33333333
# default size :792X612, dpi=72
# (1.33333333-->1056x816) (2-->1584x1224)
zoom_x = zoom_y = 1.33333333
mat = fitz.Matrix(zoom_x, zoom_y)
for i in range(len(pdf)):
page = pdf.load_page(i)
pix = page.get_pixmap(matrix=mat, alpha=False)
img = np.frombuffer(buffer=pix.samples, dtype=np.uint8).reshape((pix.h, pix.w, -1)).copy()
imgs.append(img)
# Iterate over the list of images, process each one, and save it
for i in range(len(imgs)):
io.imsave(os.path.join(args.target, '{}.jpg').format(i + 1), handle(imgs[i]))
else:
# If the file format is not supported, print an error message
print('Unsupported file format')