Extract email addresses from given URL.
Project description
Extract emails from a given website
Requirements
Minimum Python3.6
requests
lxml
Installation
pip install extract_emails
Usage
With default browsers
from extract_emails import EmailExtractor from extract_emails.browsers import ChromeBrowser with ChromeBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
from extract_emails import EmailExtractor from extract_emails.browsers import RequestsBrowser with RequestsBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
With custom browser
from extract_emails import EmailExtractor from extract_emails.browsers import BrowserInterface from selenium import webdriver from selenium.webdriver.firefox.options import Options class FirefoxBrowser(BrowserInterface): def __init__(self): ff_options = Options() self._driver = webdriver.Firefox( options=ff_options, executable_path="/home/di/geckodriver", ) def close(self): self._driver.quit() def get_page_source(self, url: str) -> str: self._driver.get(url) return self._driver.page_source with FirefoxBrowser() as browser: email_extractor = EmailExtractor("http://www.tomatinos.com/", browser, depth=2) emails = email_extractor.get_emails() for email in emails: print(email) print(email.as_dict()) # Email(email="bakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'bakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'} # Email(email="freshlybakedincloverdale@gmail.com", source_page="http://www.tomatinos.com/") # {'email': 'freshlybakedincloverdale@gmail.com', 'source_page': 'http://www.tomatinos.com/'}
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
extract_emails-4.0.0.tar.gz
(13.7 kB
view hashes)
Built Distribution
Close
Hashes for extract_emails-4.0.0-py2.py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | d9850bdb7b2a9c23ee1331391330939a717e73a04bd07f4d5adb704c0b362035 |
|
MD5 | bde75b647c899d3176049e491c1afe98 |
|
BLAKE2b-256 | 80242caf77867a0698414ede28ec3cc381eb073954b624cc7b3213c75786d498 |