Extract email addresses from given URL.
Project description
Extract emails from a given website
Requirements
Python >= 3.6
requests
selenium
Installation
pip install extract_emails
Usage
With default browsers
from extract_emails import EmailExtractor from extract_emails.browsers import ChromeBrowser with ChromeBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
from extract_emails import EmailExtractor from extract_emails.browsers import RequestsBrowser with RequestsBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
With custom browser
from extract_emails import EmailExtractor from extract_emails.browsers import BrowserInterface from selenium import webdriver from selenium.webdriver.firefox.options import Options class FirefoxBrowser(BrowserInterface): def __init__(self): ff_options = Options() self._driver = webdriver.Firefox( options=ff_options, executable_path="/home/di/geckodriver", ) def close(self): self._driver.quit() def get_page_source(self, url: str) -> str: self._driver.get(url) return self._driver.page_source with FirefoxBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
extract_emails-4.0.2.tar.gz
(14.5 kB
view hashes)
Built Distribution
Close
Hashes for extract_emails-4.0.2-py2.py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 35c82ffc0ded2e6c6e377d57ed0c31dbf536df7380ad2624d07f68e5f8ef7259 |
|
MD5 | 7249430e34db16af50fdee5f56812f2e |
|
BLAKE2b-256 | eee3b44e809d55b9147b6bd32f96c9a4e346b34e02ef8c7681bd691ae28868f9 |