Popular topics: Multilogin X, Multilogin 6,
Setting up automatic cookie collection
Table of contents
In this article, we're sharing a script that automates cookie collection from target websites, ensuring compliance with cookie consent requirements. This script is compatible with both quick and existing regular profiles.
Before you start
Preparing the environment
First, make sure you have Python 3 or newer installed in your machine.
The Python libraries below must be installed before running the script. Find instructions to install Python libraries here.
- dotenv
- os
- requests
- hashlib
Preparing the files
- Save the below scripts as
.py
files in a dedicated folder on your device
cookie_robot.py
import os
import dotenv
import time
import random
from multilogin import Mlx
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains as Ac
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
class CookieRobot:
def __init__(
self,
email_address: str,
password: str,
websites: list,
profile_id=None,
folder_id=None,
token=None,
browser_type=None,
):
self.folder_id = folder_id
self.websites = websites
self.profile_id = profile_id
self.token = token
self.mlx = Mlx(email_address, password, token)
self.browser_type = browser_type
def allow_cookies(self):
driver = self.driver
wait = WebDriverWait(driver, 20)
time.sleep(10)
for handle in driver.window_handles:
driver.switch_to.window(handle)
title = driver.title
if title == "Welcome to superagent!":
break
wait.until(
EC.presence_of_element_located(
(By.XPATH, "//h6[contains(., 'Cookie Preferences')]")
)
).click()
wait.until(EC.presence_of_element_located((By.NAME, "advertising"))).click()
wait.until(EC.presence_of_element_located((By.NAME, "other"))).click()
cookie_options_number = len(
driver.find_elements(By.XPATH, "//h6[contains(., 'Accept')]")
)
assert cookie_options_number == 8
driver.close()
def automation(self):
self.allow_cookies()
main_handle = self.driver.window_handles[0]
self.driver.switch_to.window(main_handle)
try:
for website in self.websites:
domain = website.split("//")[1].split("/")[0].split(".")[0]
cookie_counter = 0
self.driver.get(website)
while cookie_counter < 15:
current_page = self.driver.current_url
# Watch Youtube videos
if "watch?" in current_page:
time.sleep(random.randrange(120, 240))
# Watch "Shorts" videos on Youtube
elif "shorts" in current_page:
time.sleep(random.randrange(60, 90))
self.scroll_randomly(random.randint(1, 5))
link_elements = self.driver.find_elements(By.TAG_NAME, "a")
elements_with_domain = []
for element in link_elements:
element_url = element.get_attribute("href")
if element_url == None:
continue
if domain in element_url:
elements_with_domain.append(element)
random_link = random.choice(elements_with_domain)
try:
Ac(self.driver).move_to_element(random_link).pause(
5
).click().perform()
cookie_counter += 1
except:
try:
self.driver.execute_script(
"arguments[0].scrollIntoView(true); arguments[0].click();",
random_link,
)
cookie_counter += 1
except:
continue
finally:
time.sleep(random.randint(3, 5))
except Exception as e:
print(f"Something happened: {e}")
finally:
# Close browser profile and quit driver
self.driver.quit()
self.mlx.stop_profile(self.profile_id)
def scroll_randomly(self, times):
for _ in range(times):
total_height = self.driver.execute_script(
"return document.body.scrollHeight"
)
random_position = random.randint(0, total_height)
self.driver.execute_script(f"window.scrollTo(0, {random_position});")
time.sleep(random.randint(1, 5))
def start_profile(self):
try:
profile_started = False
while not profile_started:
(
self.profile_id,
self.profile_port,
profile_started,
message,
) = self.mlx.start_normal_profile(self.profile_id, self.folder_id)
if profile_started:
return
print(
f"Profile couldn't be started. Probably downloading core. Will wait for 60 seconds and try again. Here is the message: {message}"
)
time.sleep(60)
except Exception as e:
print(f"Problem with starting profile: {e}")
def run(self):
if self.token == None:
self.token = self.mlx.signin()
self.start_profile()
self.driver = self.mlx.instantiate_driver(self.profile_port, self.browser_type)
self.automation()
if __name__ == "__main__":
dotenv.load_dotenv()
# Add as many websites as you want for cookie collection
WEBSITES = ["https://stackoverflow.com/", "https://medium.com/"]
EMAIL = os.getenv("MLX_EMAIL")
PASSWORD = os.getenv("MLX_PASSWORD")
EXTENSION = os.getenv("EXTENSION_PATH")
BROWSER = os.getenv("BROWSER_TYPE")
PROFILE_ID = os.getenv("PROFILE_ID")
FOLDER_ID = os.getenv("FOLDER_ID")
bot = CookieRobot(
email_address=EMAIL,
password=PASSWORD,
websites=WEBSITES,
browser_type=BROWSER,
profile_id=PROFILE_ID,
folder_id=FOLDER_ID,
)
bot.run()
multilogin.py
import os
import requests
import hashlib
from selenium import webdriver
from selenium.webdriver.chromium.options import ChromiumOptions
from selenium.webdriver.firefox.options import Options
class Mlx:
def __init__(self, email: str, password: str, token=None):
self.email = email
self.password = password
self.token = token
self.headers = {
"Content-Type": "application/json",
"Accept": "application/json",
}
def signin(self) -> str:
url = "https://api.multilogin.com/user/signin"
payload = {
"email": self.email,
"password": hashlib.md5(self.password.encode()).hexdigest(),
}
r = requests.post(url=url, headers=self.headers, json=payload)
if r.status_code != 200:
print("Wrong credentials")
else:
json_response = r.json()
self.token = json_response["data"]["token"]
self.headers.update({"Authorization": f"Bearer {self.token}"})
return self.token
def start_quick_profile(self, browser_type):
if browser_type == "stealthfox":
relative_path = "./extensions/superagent.xpi"
self.extension_path = os.path.abspath(relative_path)
if browser_type == "mimic":
relative_path = "./extensions/superagent"
self.extension_path = os.path.abspath(relative_path)
payload = {
"browser_type": browser_type,
"os_type": "linux",
"automation": "selenium",
"parameters": {
"fingerprint": {
"cmd_params": {
"params": [
{"flag": "load-extension", "value": self.extension_path}
]
}
},
"flags": {
"audio_masking": "mask",
"fonts_masking": "mask",
"geolocation_masking": "mask",
"geolocation_popup": "prompt",
"graphics_masking": "mask",
"graphics_noise": "mask",
"localization_masking": "mask",
"media_devices_masking": "mask",
"navigator_masking": "mask",
"ports_masking": "mask",
"proxy_masking": "disabled",
"screen_masking": "mask",
"timezone_masking": "mask",
"webrtc_masking": "mask",
},
},
}
try:
response = requests.post(
url="https://launcher.mlx.yt:45001/api/v2/profile/quick",
headers=self.headers,
json=payload,
)
data = response.json()
if data["status"]["http_code"] != 200:
message = data["status"]["message"]
return None, None, False, message
else:
quick_profile_id = data["data"]["id"]
quick_profile_port = data["data"]["port"]
profile_started = True
message = data["status"]["message"]
return quick_profile_id, quick_profile_port, profile_started, message
except Exception as e:
return None, None, False, str(e)
def start_normal_profile(self, profile_id: str, folder_id: str):
url = f"https://launcher.mlx.yt:45001/api/v2/profile/f/{folder_id}/p/{profile_id}/start?automation_type=selenium&headless_mode=false"
response = requests.get(url=url, headers=self.headers)
if response.status_code != 200:
message = response.json()["status"]["message"]
profile_port = False
profile_started = False
print(f"Error at starting profile: {message}")
return profile_id, profile_port, profile_started, message
else:
profile_port = response.json()["data"]["port"]
message = response.json()["status"]["message"]
profile_started = True
return profile_id, profile_port, profile_started, message
def stop_profile(self, profile_id: str):
url = f"https://launcher.mlx.yt:45001/api/v1/profile/stop/p/{profile_id}"
r = requests.get(url=url, headers=self.headers)
if r.status_code != 200:
print("Can't stop profile")
else:
print("Profile stopped")
def instantiate_driver(self, profile_port: str, browser_type="mimic") -> webdriver:
if browser_type == "mimic":
options = ChromiumOptions()
driver = webdriver.Remote(
command_executor=f"http://127.0.0.1:{profile_port}", options=options
)
elif browser_type == "stealthfox":
options = Options()
driver = webdriver.Remote(
command_executor=f"http://127.0.0.1:{profile_port}", options=options
)
return driver
def get_proxy_details(self, proxy_settings, token=None) -> dict:
if token == None:
self.token = self.signin()
self.headers.update({"Authorization": f"Bearer {self.token}"})
url = "https://profile-proxy.multilogin.com/v1/proxy/connection_url"
payload = {
"country": proxy_settings["country_code"],
"region": proxy_settings["region"],
"city": proxy_settings["city"],
"protocol": "socks5",
"sessionType": "sticky",
"IPTTL": 0,
}
response = requests.post(url=url, headers=self.headers, json=payload)
if response.status_code != 201:
print(f"Could not get proxy session: {response.status_code}")
else:
session = response.json()["data"].split(":")
proxy_details = {
"host": session[0],
"port": session[1],
"username": session[2],
"password": session[3],
}
return proxy_details
def create_profile(self, proxy_details, profile_details, FOLDER_ID):
if self.token == None:
self.token = self.signin()
self.headers.update({"Authorization": f"Bearer {self.token}"})
payload = {
"name": f"{profile_details['first_name']} {profile_details['last_name']}",
"folder_id": FOLDER_ID,
"browser_type": "mimic",
"os_type": "linux",
"is_headless": False,
"proxy": {
"host": proxy_details["host"],
"type": "socks5",
"port": proxy_details["port"],
"username": proxy_details["username"],
"password": proxy_details["password"],
},
"parameters": {
"fingerprint": {
"cmd_params": {
"params": [{"flag": "disable-notifications", "value": "true"}]
}
},
"flags": {
"audio_masking": "natural",
"fonts_masking": "mask",
"geolocation_masking": "mask",
"geolocation_popup": "allow",
"graphics_masking": "natural",
"graphics_noise": "natural",
"localization_masking": "mask",
"media_devices_masking": "natural",
"navigator_masking": "mask",
"ports_masking": "natural",
"proxy_masking": "custom",
"screen_masking": "natural",
"timezone_masking": "mask",
"webrtc_masking": "mask",
},
"storage": {"is_local": False, "save_service_worker": False},
},
}
url = "https://api.multilogin.com/profile/create"
response = requests.post(url=url, headers=self.headers, json=payload)
if response.status_code != 201:
print(f"Could not create profile: Error {response.status_code}")
return None, None, None
else:
profile_id = response.json()["data"]["ids"][0]
created = True
return profile_id, FOLDER_ID, created
- Install this browser extension to your browser profiles using “Folder“ method. The browser extension helps with accepting cookies automatically.
- Save the below information as a
.env
file in the same folder and enter the required details
.env
MLX_EMAIL=
MLX_PASSWORD=
BROWSER_TYPE=
PROFILE_ID=
FOLDER_ID=
- Edit the website list in the
cookie_robot.py
file
# Add as many websites as you need. You can even remove this array and add another method of reading the list of websites, such as by using a spreadsheet, for example.
WEBSITES = [
"https://stackoverflow.com/",
"https://medium.com/"
]
Running the script
- Make sure the agent is connected, as it makes profile launching possible
- Run the
cookie_robot.py
file