使用 Selenium Python 和 chromedriver 截取整页截图

问题描述

在尝试了各种方法之后......我偶然发现了这个页面,用 chromedriver、selenium 和 python 截取了整页截图.

原始代码在

test.py

<代码>"""此脚本使用此处的简化版本:https://snipt.net/restrada/python-selenium-workaround-for-full-page-screenshot-using-chromedriver-2x/它包含 Jason Coutu 在评论中添加的*关键*更正."""导入系统从硒导入网络驱动程序导入单元测试导入工具类测试(unittest.TestCase):"""演示:获取Chrome生成全屏截图"""默认设置(自我):self.driver = webdriver.Chrome()def 拆解(自我):self.driver.quit()def test_fullpage_screenshot(self):''' 生成文档高度截图 '''#url = "http://effbot.org/imagingbook/introduction.htm"url = "http://www.w3schools.com/js/default.asp"self.driver.get(url)util.fullpage_screenshot(self.driver,test.png")如果 __name__ == "__main__":unittest.main(argv=[sys.argv[0]])

util.py

导入操作系统进口时间从 PIL 导入图像def fullpage_screenshot(驱动程序,文件):print("正在启动 chrome 整页截图解决方法...")total_width = driver.execute_script("返回 document.body.offsetWidth")total_height = driver.execute_script("返回 document.body.parentNode.scrollHeight")viewport_width = driver.execute_script("return document.body.clientWidth")viewport_height = driver.execute_script("返回 window.innerHeight")print("总计: ({0}, {1}), 视口: ({2},{3})".format(total_width, total_height,viewport_width,viewport_height))矩形 = []我 = 0而我 <总高度:ii = 0top_height = i + viewport_height如果 top_height >总高度:顶部高度 = 总高度而 ii <总宽度:top_width = ii + viewport_width如果顶部宽度 >总宽度:顶部宽度 = 总宽度print("附加矩形 ({0},{1},{2},{3})".format(ii, i, top_width, top_height))rectangles.append((ii, i, top_width,top_height))ii = ii + viewport_widthi = i + viewport_height缝合图像 = Image.new('RGB', (total_width, total_height))上一个 = 无部分 = 0对于矩形中的矩形:如果不是以前是无:driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))print("滚动到 ({0},{1})".format(rectangle[0], rectangle[1]))time.sleep(0.2)file_name = "part_{0}.png".format(part)print("正在捕获 {0} ...".format(file_name))driver.get_screenshot_as_file(file_name)截图 = Image.open(file_name)如果矩形[1] + viewport_height >总高度:offset = (rectangle[0], total_height - viewport_height)别的:偏移量=(矩形[0],矩形[1])print("添加到带有偏移量的拼接图像 ({0}, {1})".format(offset[0],offset[1]))缝合图像.粘贴(屏幕截图,偏移)删除截图os.remove(文件名)部分 = 部分 + 1上一个 = 矩形缝合图像.保存(文件)print("正在完成 chrome 整页截图的解决方法...")返回真

解决方案

工作原理:尽可能设置浏览器高度...

#coding=utf-8进口时间从硒导入网络驱动程序从 selenium.webdriver.chrome.options 导入选项def test_fullpage_screenshot(self):chrome_options = 选项()chrome_options.add_argument('--headless')chrome_options.add_argument('--start-maximized')驱动程序 = webdriver.Chrome(chrome_options=chrome_options)driver.get("yoururlxxx")时间.sleep(2)#页面上最长的元素ele=driver.find_element("xpath", '//div[@class="react-grid-layout layout"]')total_height = ele.size[身高"]+1000driver.set_window_size(1920, total_height) #诀窍时间.sleep(2)driver.save_screenshot("screenshot1.png")driver.quit()如果 __name__ == __main__":test_fullpage_screenshot()

After trying out various approaches... I have stumbled upon this page to take full-page screenshot with chromedriver, selenium and python.

The original code is here. (and I copy the code in this posting below)

It uses PIL and it works great! However, there is one issue... which is it captures fixed headers and repeats for the whole page and also misses some parts of the page during page change. sample url to take a screenshot:

http://www.w3schools.com/js/default.asp

How to avoid the repeated headers with this code... Or is there any better option which uses python only... ( i don't know java and do not want to use java).

Please see the screenshot of the current result and sample code below.

test.py

"""
This script uses a simplified version of the one here:
https://snipt.net/restrada/python-selenium-workaround-for-full-page-screenshot-using-chromedriver-2x/

It contains the *crucial* correction added in the comments by Jason Coutu.
"""

import sys

from selenium import webdriver
import unittest

import util

class Test(unittest.TestCase):
    """ Demonstration: Get Chrome to generate fullscreen screenshot """

    def setUp(self):
        self.driver = webdriver.Chrome()

    def tearDown(self):
        self.driver.quit()

    def test_fullpage_screenshot(self):
        ''' Generate document-height screenshot '''
        #url = "http://effbot.org/imagingbook/introduction.htm"
        url = "http://www.w3schools.com/js/default.asp"
        self.driver.get(url)
        util.fullpage_screenshot(self.driver, "test.png")


if __name__ == "__main__":
    unittest.main(argv=[sys.argv[0]])

util.py

import os
import time

from PIL import Image

def fullpage_screenshot(driver, file):

        print("Starting chrome full page screenshot workaround ...")

        total_width = driver.execute_script("return document.body.offsetWidth")
        total_height = driver.execute_script("return document.body.parentNode.scrollHeight")
        viewport_width = driver.execute_script("return document.body.clientWidth")
        viewport_height = driver.execute_script("return window.innerHeight")
        print("Total: ({0}, {1}), Viewport: ({2},{3})".format(total_width, total_height,viewport_width,viewport_height))
        rectangles = []

        i = 0
        while i < total_height:
            ii = 0
            top_height = i + viewport_height

            if top_height > total_height:
                top_height = total_height

            while ii < total_width:
                top_width = ii + viewport_width

                if top_width > total_width:
                    top_width = total_width

                print("Appending rectangle ({0},{1},{2},{3})".format(ii, i, top_width, top_height))
                rectangles.append((ii, i, top_width,top_height))

                ii = ii + viewport_width

            i = i + viewport_height

        stitched_image = Image.new('RGB', (total_width, total_height))
        previous = None
        part = 0

        for rectangle in rectangles:
            if not previous is None:
                driver.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
                print("Scrolled To ({0},{1})".format(rectangle[0], rectangle[1]))
                time.sleep(0.2)

            file_name = "part_{0}.png".format(part)
            print("Capturing {0} ...".format(file_name))

            driver.get_screenshot_as_file(file_name)
            screenshot = Image.open(file_name)

            if rectangle[1] + viewport_height > total_height:
                offset = (rectangle[0], total_height - viewport_height)
            else:
                offset = (rectangle[0], rectangle[1])

            print("Adding to stitched image with offset ({0}, {1})".format(offset[0],offset[1]))
            stitched_image.paste(screenshot, offset)

            del screenshot
            os.remove(file_name)
            part = part + 1
            previous = rectangle

        stitched_image.save(file)
        print("Finishing chrome full page screenshot workaround...")
        return True

解决方案

How it works: set browser height as longest as you can...

#coding=utf-8
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

def test_fullpage_screenshot(self):
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--start-maximized')
    driver = webdriver.Chrome(chrome_options=chrome_options)
    driver.get("yoururlxxx")
    time.sleep(2)
    
    #the element with longest height on page
    ele=driver.find_element("xpath", '//div[@class="react-grid-layout layout"]')
    total_height = ele.size["height"]+1000
    
    driver.set_window_size(1920, total_height)      #the trick
    time.sleep(2)
    driver.save_screenshot("screenshot1.png")
    driver.quit()

if __name__ == "__main__":
    test_fullpage_screenshot()

相关文章