用Python五步实现网页截图

用Python五步实现网页截图[Python常见问题]

方案说明

功能要求:实现网页加载后将页面截取成长图片
涉及模块:PyQT5 PIL

逻辑说明:

1:完成窗口设置,利用PyQT5 QWebEngineView加载网页地址,待网页加载完成后,调用check_pag;

class MainWindow(QMainWindow):
    def __init__(self, parent=None):
        super(MainWindow, self).__init__(parent)
        self.setWindowTitle("易哈佛")
        self.temp_height = 0
        self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False)  # 禁用最大化,最小化
        # self.setWindowFlag(Qt.WindowStaysOnTopHint, True)  # 窗口顶置
        self.setWindowFlag(Qt.FramelessWindowHint, True)  # 窗口无边框

    def urlScreenShot(self, url):
        self.browser = QWebEngineView()
        self.browser.load(QUrl(url))
        geometry = self.chose_screen()
        self.setGeometry(geometry)
        self.browser.loadFinished.connect(self.check_page)
        self.setCentralWidget(self.browser)

    def get_page_size(self):
        size = self.browser.page().contentsSize()
        self.set_height = size.height()
        self.set_width = size.width()
        return size.width(), size.height()

    def chose_screen(self):
        width, height = 750, 1370
        desktop = QApplication.desktop()
        screen_count = desktop.screenCount()
        for i in range(0, screen_count):
            rect = desktop.availableGeometry(i)
            s_width, s_height = rect.width(), rect.height()
            if s_width > width and s_height > height:
                return QRect(rect.left(), rect.top(), width, height)
        return QRect(0, 0, width, height)

if __name__ == "__main__":
    app = QApplication(sys.argv)
    win = MainWindow()
    win.show()
    app.exit(app.exec_())

2:收集页面高度,并计算分次截屏的次数和余量高度;实例化图片合并工具,设置定时器,超时信号发出后,执行exe_command;

def check_page(self):
        p_width, p_height = self.get_page_size()
        self.page, self.over_flow_size = divmod(p_height, self.height())
        if self.page == 0:
            self.page = 1
        self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
        self.timer = QTimer(self)
        self.timer.timeout.connect(self.exe_command)
        self.timer.setInterval(400)
        self.timer.start()

3:exe_command用来控制截图次数,并在每次截图完成后控制网页向下滑屏幕的高度;所有的页面都已截取时,完成图片合并。

def exe_command(self):
        if self.page > 0:
            self.screen_shot()
            self.run_js()

        elif self.page < 0:
            self.timer.stop()
            self.ssm.image_merge()
            self.close()

        elif self.over_flow_size > 0:
            self.screen_shot()
        self.page -= 1
        
    def run_js(self):
        script = """
            var scroll = function (dHeight) {
            var t = document.documentElement.scrollTop
            var h = document.documentElement.scrollHeight
            dHeight = dHeight || 0
            var current = t + dHeight
            if (current > h) {
                window.scrollTo(0, document.documentElement.clientHeight)
              } else {
                window.scrollTo(0, current)
              }
            }
        """
        command = script + "
 scroll({})".format(self.height())
        self.browser.page().runJavaScript(command)

4:screen_shot在每次截图完成后将图片保存,并将图片对象由图片合并根据保存到列表中。

def screen_shot(self):
        screen = QApplication.primaryScreen()
        winid = self.browser.winId()
        pix = screen.grabWindow(int(winid))
        name = "{}/temp.png".format(self.ssm.root_path)
        pix.save(name)
        self.ssm.add_im(name)

5:截图合并工具,在每次截图完成后将图片对象保存,完成余量截图的重绘和截图的合并。

class ScreenShotMerge():
    def __init__(self, page, over_flow_size):
        self.im_list = []
        self.page = page
        self.over_flow_size = over_flow_size
        self.get_path()

    def get_path(self):
        self.root_path = Path(__file__).parent.joinpath("temp")
        if not self.root_path.exists():
            self.root_path.mkdir(parents=True)
        self.save_path = self.root_path.joinpath("merge.png")

    def add_im(self, path):
        if len(self.im_list) == self.page:
            im = self.reedit_image(path)
        else:
            im = Image.open(path)
        im.save("{}/{}.png".format(self.root_path, len(self.im_list) + 1))
        self.im_list.append(im)

    def get_new_size(self):
        max_width = 0
        total_height = 0
        # 计算合成后图片的宽度(以最宽的为准)和高度
        for img in self.im_list:
            width, height = img.size
            if width > max_width:
                max_width = width
            total_height += height
        return max_width, total_height

    def image_merge(self, ):
        if len(self.im_list) > 1:
            max_width, total_height = self.get_new_size()
            # 产生一张空白图
            new_img = Image.new("RGB", (max_width - 15, total_height), 255)
            x = y = 0
            for img in self.im_list:
                width, height = img.size
                new_img.paste(img, (x, y))
                y += height
            new_img.save(self.save_path)
            print("截图成功:", self.save_path)
        else:
            obj = self.im_list[0]
            width, height = obj.size
            left, top, right, bottom = 0, 0, width, height
            box = (left, top, right, bottom)
            region = obj.crop(box)
            new_img = Image.new("RGB", (width, height), 255)
            new_img.paste(region, box)
            new_img.save(self.save_path)
            print("截图成功:", self.save_path)

    def reedit_image(self, path):
        obj = Image.open(path)
        width, height = obj.size
        left, top, right, bottom = 0, height - self.over_flow_size, width, height
        box = (left, top, right, bottom)
        region = obj.crop(box)
        return region

截图功能完整代码

#!/usr/bin/env python
# -*- coding:UTF-8 -*-
#Python学习交流群:778463939

import sys
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PIL import Image
from pathlib import Path


class ScreenShotMerge():
    def __init__(self, page, over_flow_size):
        self.im_list = []
        self.page = page
        self.over_flow_size = over_flow_size
        self.get_path()

    def get_path(self):
        self.root_path = Path(__file__).parent.joinpath("temp")
        if not self.root_path.exists():
            self.root_path.mkdir(parents=True)
        self.save_path = self.root_path.joinpath("merge.png")

    def add_im(self, path):
        if len(self.im_list) == self.page:
            im = self.reedit_image(path)
        else:
            im = Image.open(path)
        im.save("{}/{}.png".format(self.root_path, len(self.im_list) + 1))
        self.im_list.append(im)

    def get_new_size(self):
        max_width = 0
        total_height = 0
        # 计算合成后图片的宽度(以最宽的为准)和高度
        for img in self.im_list:
            width, height = img.size
            if width > max_width:
                max_width = width
            total_height += height
        return max_width, total_height

    def image_merge(self, ):
        if len(self.im_list) > 1:
            max_width, total_height = self.get_new_size()
            # 产生一张空白图
            new_img = Image.new("RGB", (max_width - 15, total_height), 255)
            x = y = 0
            for img in self.im_list:
                width, height = img.size
                new_img.paste(img, (x, y))
                y += height
            new_img.save(self.save_path)
            print("截图成功:", self.save_path)
        else:
            obj = self.im_list[0]
            width, height = obj.size
            left, top, right, bottom = 0, 0, width, height
            box = (left, top, right, bottom)
            region = obj.crop(box)
            new_img = Image.new("RGB", (width, height), 255)
            new_img.paste(region, box)
            new_img.save(self.save_path)
            print("截图成功:", self.save_path)

    def reedit_image(self, path):
        obj = Image.open(path)
        width, height = obj.size
        left, top, right, bottom = 0, height - self.over_flow_size, width, height
        box = (left, top, right, bottom)
        region = obj.crop(box)
        return region


class MainWindow(QMainWindow):
    def __init__(self, parent=None):
        super(MainWindow, self).__init__(parent)
        self.setWindowTitle("易哈佛")
        self.temp_height = 0
        self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False)  # 禁用最大化,最小化
        # self.setWindowFlag(Qt.WindowStaysOnTopHint, True)  # 窗口顶置
        self.setWindowFlag(Qt.FramelessWindowHint, True)  # 窗口无边框

    def urlScreenShot(self, url):
        self.browser = QWebEngineView()
        self.browser.load(QUrl(url))
        geometry = self.chose_screen()
        self.setGeometry(geometry)
        self.browser.loadFinished.connect(self.check_page)
        self.setCentralWidget(self.browser)

    def get_page_size(self):
        size = self.browser.page().contentsSize()
        self.set_height = size.height()
        self.set_width = size.width()
        return size.width(), size.height()

    def chose_screen(self):
        width, height = 750, 1370
        desktop = QApplication.desktop()
        screen_count = desktop.screenCount()
        for i in range(0, screen_count):
            rect = desktop.availableGeometry(i)
            s_width, s_height = rect.width(), rect.height()
            if s_width > width and s_height > height:
                return QRect(rect.left(), rect.top(), width, height)
        return QRect(0, 0, width, height)

    def check_page(self):
        p_width, p_height = self.get_page_size()
        self.page, self.over_flow_size = divmod(p_height, self.height())
        if self.page == 0:
            self.page = 1
        self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
        self.timer = QTimer(self)
        self.timer.timeout.connect(self.exe_command)
        self.timer.setInterval(400)
        self.timer.start()

    def exe_command(self):
        if self.page > 0:
            self.screen_shot()
            self.run_js()

        elif self.page < 0:
            self.timer.stop()
            self.ssm.image_merge()
            self.close()

        elif self.over_flow_size > 0:
            self.screen_shot()
        self.page -= 1

    def run_js(self):
        script = """
            var scroll = function (dHeight) {
            var t = document.documentElement.scrollTop
            var h = document.documentElement.scrollHeight
            dHeight = dHeight || 0
            var current = t + dHeight
            if (current > h) {
                window.scrollTo(0, document.documentElement.clientHeight)
              } else {
                window.scrollTo(0, current)
              }
            }
        """
        command = script + "
 scroll({})".format(self.height())
        self.browser.page().runJavaScript(command)

    def screen_shot(self):
        screen = QApplication.primaryScreen()
        winid = self.browser.winId()
        pix = screen.grabWindow(int(winid))
        name = "{}/temp.png".format(self.ssm.root_path)
        pix.save(name)
        self.ssm.add_im(name)


if __name__ == "__main__":
    url = "http://blog.sina.com.cn/lm/rank/focusbang//"
    app = QApplication(sys.argv)
    win = MainWindow()
    win.urlScreenShot(url)
    win.show()
    app.exit(app.exec_())
hmoban主题是根据ripro二开的主题,极致后台体验,无插件,集成会员系统
自学咖网 » 用Python五步实现网页截图