Arthur

Pemberton

Full-stack web applications developer


Full page screenshots with Python 3 and Selenium

December 26, 2018Arthur Pemberton1 Comment

This is an update to Full page screenshots with Python and Selenium updated to work with Python 3, and fix an issue with single pages.

Script was tested with:

  • Python 3.6
  • Pillow 5.3.0
  • selenium 3.141.0
# stdlib imports
import datetime
import math
import os
import tempfile
# third-party imports
from PIL import Image
from selenium import webdriver


def get_chrome_drive(driver_path=None):
	base_dir = os.path.dirname( os.path.abspath(__file__) )
	log_path = os.path.join( base_dir, 'chromedriver.log' )

	if driver_path is None:
		driver_path = os.path.join( base_dir, 'bin', 'chromedriver' )
		pass

	options = webdriver.ChromeOptions()
	options.headless = True
	options.add_argument('--hide-scrollbars')
	options.add_argument('--no-sandbox')

	driver = webdriver.Chrome(
		executable_path=driver_path,
		chrome_options=options,
		service_args=[
			'--log-path={}'.format(log_path),
			'--verbose',
		]
	)

	return driver

def get_firefox_drive(driver_path=None):
	base_dir = os.path.dirname( os.path.abspath(__file__) )
	log_path = os.path.join( base_dir, 'geckodriver.log' )

	if driver_path is None:
		driver_path = os.path.join( base_dir, 'bin', 'geckodriver' )
		pass

	options = webdriver.FirefoxOptions()
	options.add_argument('-headless')

	driver = webdriver.Firefox(
		executable_path=driver_path,
		firefox_options=options
	)

	return driver

def save_fullpage_screenshot(driver, url, output_path, tmp_prefix='selenium_screenshot', tmp_suffix='.png'):
	"""
	Creates a full page screenshot using a selenium driver by scrolling and taking multiple screenshots,
	and stitching them into a single image.
	"""

	# get the page
	driver.get(url)

	# get dimensions
	window_height = driver.execute_script('return window.innerHeight')
	scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
	num = int( math.ceil( float(scroll_height) / float(window_height) ) )

	# get temp files
	tempfiles = []
	for i in range( num ):
		fd,path = tempfile.mkstemp(prefix='{0}-{1:02}-'.format(tmp_prefix, i+1), suffix=tmp_suffix)
		os.close(fd)
		tempfiles.append(path)
		pass
	tempfiles_len = len(tempfiles)

	try:
		# take screenshots
		for i,path in enumerate(tempfiles):
			if i > 0:
				driver.execute_script( 'window.scrollBy(%d,%d)' % (0, window_height) )

			driver.save_screenshot(path)
			pass

		# stitch images together
		stiched = None
		for i,path in enumerate(tempfiles):
			img = Image.open(path)

			w, h = img.size
			y = i * window_height

			if i == ( tempfiles_len - 1 ) and num > 1:
				img = img.crop((
					0,
					h-(scroll_height % h),
					w,
					h
				))

				w, h = img.size
				pass

			if stiched is None:
				stiched = Image.new('RGB', (w, scroll_height))

			stiched.paste(img, (
				0, # x0
				y, # y0
				w, # x1
				y + h # y1
			))
			pass
		stiched.save(output_path)
	finally:
		# cleanup
		for path in tempfiles:
			if os.path.isfile(path):
				os.remove(path)
		pass

	return output_path


def main():
	now = datetime.datetime.now()

	filename = 'screenshot-{}-{}.png'.format(
		now.strftime('%Y%m%d'),
		now.strftime('%H%M%S')
	)

	driver = get_chrome_drive() if True else get_firefox_drive()

	driver.set_window_size(1280,800)

	url = 'https://arthurpemberton.com/'

	save_fullpage_screenshot(
		driver,
		url,
		filename
	)

	driver.quit()

	print( filename )

	return


if __name__ == '__main__':
	main()


Leave a Reply