This is an update to Full page screenshots with Python and Selenium updated to work with Python 3, and fix an issue with single pages.
Script was tested with:
- Python 3.6
- Pillow 5.3.0
- selenium 3.141.0
# stdlib imports
import datetime
import math
import os
import tempfile
# third-party imports
from PIL import Image
from selenium import webdriver
def get_chrome_drive(driver_path=None):
base_dir = os.path.dirname( os.path.abspath(__file__) )
log_path = os.path.join( base_dir, 'chromedriver.log' )
if driver_path is None:
driver_path = os.path.join( base_dir, 'bin', 'chromedriver' )
pass
options = webdriver.ChromeOptions()
options.headless = True
options.add_argument('--hide-scrollbars')
options.add_argument('--no-sandbox')
driver = webdriver.Chrome(
executable_path=driver_path,
chrome_options=options,
service_args=[
'--log-path={}'.format(log_path),
'--verbose',
]
)
return driver
def get_firefox_drive(driver_path=None):
base_dir = os.path.dirname( os.path.abspath(__file__) )
log_path = os.path.join( base_dir, 'geckodriver.log' )
if driver_path is None:
driver_path = os.path.join( base_dir, 'bin', 'geckodriver' )
pass
options = webdriver.FirefoxOptions()
options.add_argument('-headless')
driver = webdriver.Firefox(
executable_path=driver_path,
firefox_options=options
)
return driver
def save_fullpage_screenshot(driver, url, output_path, tmp_prefix='selenium_screenshot', tmp_suffix='.png'):
"""
Creates a full page screenshot using a selenium driver by scrolling and taking multiple screenshots,
and stitching them into a single image.
"""
# get the page
driver.get(url)
# get dimensions
window_height = driver.execute_script('return window.innerHeight')
scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
num = int( math.ceil( float(scroll_height) / float(window_height) ) )
# get temp files
tempfiles = []
for i in range( num ):
fd,path = tempfile.mkstemp(prefix='{0}-{1:02}-'.format(tmp_prefix, i+1), suffix=tmp_suffix)
os.close(fd)
tempfiles.append(path)
pass
tempfiles_len = len(tempfiles)
try:
# take screenshots
for i,path in enumerate(tempfiles):
if i > 0:
driver.execute_script( 'window.scrollBy(%d,%d)' % (0, window_height) )
driver.save_screenshot(path)
pass
# stitch images together
stiched = None
for i,path in enumerate(tempfiles):
img = Image.open(path)
w, h = img.size
y = i * window_height
if i == ( tempfiles_len - 1 ) and num > 1:
img = img.crop((
0,
h-(scroll_height % h),
w,
h
))
w, h = img.size
pass
if stiched is None:
stiched = Image.new('RGB', (w, scroll_height))
stiched.paste(img, (
0, # x0
y, # y0
w, # x1
y + h # y1
))
pass
stiched.save(output_path)
finally:
# cleanup
for path in tempfiles:
if os.path.isfile(path):
os.remove(path)
pass
return output_path
def main():
now = datetime.datetime.now()
filename = 'screenshot-{}-{}.png'.format(
now.strftime('%Y%m%d'),
now.strftime('%H%M%S')
)
driver = get_chrome_drive() if True else get_firefox_drive()
driver.set_window_size(1280,800)
url = 'https://arthurpemberton.com/'
save_fullpage_screenshot(
driver,
url,
filename
)
driver.quit()
print( filename )
return
if __name__ == '__main__':
main()
