import time
import random
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
PATH = '/Users/boaty_mcboatface/Desktop/chromedriver'
options = webdriver.ChromeOptions()
options.add_argument("user-data-dir=/Users/boaty_mcboatface/Library/Application Support/Google/Chrome/Default")
driver = webdriver.Chrome(PATH, options=options)
driver.get('https://digital.asahi.com/senkyo/shuinsen/2021/asahitodai/?_requesturl=senkyo/shuinsen/2021/asahitodai/');
wait_v_short = round(random.uniform(0.1,0.6), 2)
wait_long = round(random.uniform(4,5), 2)
time.sleep(wait_long)
all_cands = []
def write_cands():
with open("cands2.csv", "w") as file:
headers = [
"name",
"age",
"party",
"dist",
"status",
"defense_strength",
"preemtive_strike",
"north_korea",
"nuclear_3_no",
"futenma",
"anpo",
"china",
"social_services",
"public_works",
"fiscal_stimulus",
"consumption_tax",
"competitiveness",
"income_tax",
"corporate_tax",
"competitiveness",
"protectionism",
"bonds",
"constitution",
"immigration",
"privacy",
"fukushima",
"marriage_name",
"traditional_marriage",
"lgbt",
"nuclear",
"malapportionment"
]
csv_writer = csv.DictWriter(file,fieldnames=headers)
csv_writer.writeheader()
for cand in all_cands:
csv_writer.writerow(cand)
def scrape_cand():
profile = driver.find_element(By.ID, "koho_Profile")
dd_tags_p = profile.find_elements(By.TAG_NAME, "dd")
answers = driver.find_element(By.ID, "koho_Answer")
dd_tags = answers.find_elements(By.TAG_NAME, "dd")
all_cands.append({
"name": profile.find_element(By.CLASS_NAME, "name").text,
"age": dd_tags_p[0].find_element(By.TAG_NAME, "p").text,
"party": dd_tags_p[1].find_element(By.TAG_NAME, "p").text,
"dist": dd_tags_p[2].find_element(By.TAG_NAME, "p").text,
"status": dd_tags_p[3].find_element(By.TAG_NAME, "p").text,
"defense_strength": dd_tags[0].find_element(By.TAG_NAME, "div").text,
"preemtive_strike": dd_tags[1].find_element(By.TAG_NAME, "div").text,
"north_korea": dd_tags[2].find_element(By.TAG_NAME, "div").text,
"nuclear_3_no": dd_tags[3].find_element(By.TAG_NAME, "div").text,
"futenma": dd_tags[4].find_element(By.TAG_NAME, "div").text,
"anpo": dd_tags[5].find_element(By.TAG_NAME, "div").text,
"china": dd_tags[6].find_element(By.TAG_NAME, "div").text,
"social_services": dd_tags[7].find_element(By.TAG_NAME, "div").text,
"public_works": dd_tags[8].find_element(By.TAG_NAME, "div").text,
"fiscal_stimulus": dd_tags[9].find_element(By.TAG_NAME, "div").text,
"consumption_tax": dd_tags[10].find_element(By.TAG_NAME, "div").text,
"income_tax": dd_tags[11].find_element(By.TAG_NAME, "div").text,
"corporate_tax": dd_tags[12].find_element(By.TAG_NAME, "div").text,
"competitiveness": dd_tags[13].find_element(By.TAG_NAME, "div").text,
"protectionism": dd_tags[14].find_element(By.TAG_NAME, "div").text,
"bonds": dd_tags[15].find_element(By.TAG_NAME, "div").text,
"constitution": dd_tags[16].find_element(By.TAG_NAME, "div").text,
"immigration": dd_tags[17].find_element(By.TAG_NAME, "div").text,
"privacy": dd_tags[18].find_element(By.TAG_NAME, "div").text,
"fukushima": dd_tags[19].find_element(By.TAG_NAME, "div").text,
"marriage_name": dd_tags[20].find_element(By.TAG_NAME, "div").text,
"traditional_marriage": dd_tags[21].find_element(By.TAG_NAME, "div").text,
"lgbt": dd_tags[22].find_element(By.TAG_NAME, "div").text,
"nuclear": dd_tags[23].find_element(By.TAG_NAME, "div").text,
"malapportionment": dd_tags[24].find_element(By.TAG_NAME, "div").text
})
with open("cands_urls.csv", "r") as file:
csv_reader = csv.reader(file)
poop = list(csv_reader)
all_cand_urls = []
for url in poop:
all_cand_urls.append(''.join(url))
for url in all_cand_urls:
driver.get(url);
time.sleep(wait_v_short)
try:
profile = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.ID, "koho_Profile"))
)
scrape_cand()
time.sleep(wait_v_short)
except:
pass
write_cands()