-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Expand file tree
/
Copy pathraw_indeed.py
More file actions
32 lines (31 loc) · 1.05 KB
/
raw_indeed.py
File metadata and controls
32 lines (31 loc) · 1.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from seleniumbase import SB
with SB(uc=True, test=True) as sb:
url = "https://www.indeed.com/companies/search"
sb.activate_cdp_mode(url)
sb.sleep(2)
search_box = "input#company-search"
if not sb.is_element_present(search_box):
sb.solve_captcha()
sb.sleep(1)
company = "NASA Jet Propulsion Laboratory"
sb.click(search_box)
sb.sleep(0.1)
sb.press_keys(search_box, company)
sb.click('button[type="submit"]')
sb.click('a:contains("%s")' % company)
name_header = 'div[itemprop="name"]'
sb.sleep(1)
if not sb.is_element_present(name_header):
sb.sleep(2)
sb.solve_captcha()
sb.sleep(1)
sb.highlight(name_header)
sb.sleep(1)
sb.cdp.highlight('h2:contains("About the company")')
sb.sleep(1)
for i in range(10):
sb.scroll_down(12)
sb.sleep(0.14)
info = sb.find_element('[data-testid="AboutSection-section"]')
soup = sb.get_beautiful_soup(info.get_html()).get_text("\n").strip()
print("*** %s: ***\n%s" % (company, soup.replace("\n:", ":")))