-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
245342d
commit 5da4c5e
Showing
13 changed files
with
627 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
*.csv | ||
*.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
FROM python:3.8 | ||
|
||
# install google chrome | ||
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - | ||
RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list' | ||
RUN apt-get -y update | ||
RUN apt-get install -y google-chrome-stable | ||
|
||
# install chromedriver | ||
RUN apt-get install -yqq unzip | ||
RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip | ||
RUN unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/ | ||
|
||
RUN apt-get install -yqq mpg123 | ||
|
||
RUN mkdir -p /root/.streamlit | ||
|
||
RUN bash -c 'echo -e "\ | ||
[general]\n\ | ||
email = \"[email protected]\"\n\ | ||
" > /root/.streamlit/credentials.toml' | ||
|
||
# set display port to avoid crash | ||
ENV DISPLAY=:99 | ||
|
||
# upgrade pip | ||
RUN pip install --upgrade pip | ||
|
||
# Set the working directory in the container to /app | ||
WORKDIR /app | ||
|
||
COPY ./requirements.txt /app/requirements.txt | ||
|
||
RUN pip install --upgrade -r /app/requirements.txt | ||
|
||
# Add the current directory contents into the container at /app | ||
ADD . /app | ||
|
||
|
||
# Run script.py when the container launches | ||
CMD ["streamlit", "run", "main.py", "--server.port", "9999"] |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
from helpers import * | ||
import streamlit as st | ||
import html | ||
from streamlit_extras.add_vertical_space import add_vertical_space | ||
import json | ||
import subprocess | ||
import ast | ||
|
||
|
||
st.set_page_config(page_title="Job Scraper",layout="wide") | ||
|
||
if ["first_start"] not in st.session_state: | ||
st.session_state["first_start"] = True | ||
|
||
|
||
with open("./data/app_metadata.json", "r") as f: metadata = json.load(f) # STart script on reload | ||
if ((time.time() - metadata["last_scrape_time"] > (metadata["auto_scrape_frequency_in_mins"] * 60)) and (not metadata["process_busy"])) or (st.session_state["first_start"]): | ||
|
||
st.session_state["first_start"] = False | ||
subprocess.Popen(["/home/shady/anaconda3/envs/py38/bin/python", "./script.py"]) # run script | ||
|
||
|
||
with open("./data/app_metadata.json","r") as f:st.session_state["last_scrape_time"] = metadata["last_scrape_time"] | ||
df = pd.read_csv("./data/most_recently_scraped.csv") | ||
|
||
|
||
if "job_title" not in st.session_state: | ||
st.session_state["job_title"] = "Machine Learning, Deep Learning, Computer Vision" | ||
|
||
if "location" not in st.session_state: | ||
st.session_state["location"] = "india" | ||
|
||
|
||
with st.sidebar: | ||
st.markdown(''' | ||
# About | ||
💡 <span style="color:teal">Scrape Linkedin Jobs of your choice to stay ahead of others!!!</span> | ||
''', unsafe_allow_html=True) | ||
|
||
st.title(f""":green[_{df.shape[0]} New Jobs_]""") | ||
|
||
data_staleness = int((time.time() - st.session_state["last_scrape_time"]) / 60) | ||
st.warning(f"This data was scraped {data_staleness} minutes ago. Click on 'Fetch Fresh Jobs' to get new jobs data",icon="⚠️") | ||
st.markdown("---") | ||
|
||
|
||
scrollable_css = """ | ||
<style> | ||
.scrollable { | ||
max-height: 500px; | ||
overflow-y: auto; | ||
} | ||
</style> | ||
""" | ||
st.markdown(scrollable_css, unsafe_allow_html=True) | ||
|
||
outer_html = """<div style="overflow:auto; width:1280px; height:850px;">""" | ||
for index, row in df.iterrows(): | ||
|
||
j_id , exp, ti_el, appli, pos, comp, loc, desc = row.values.tolist() | ||
link = f"https://www.linkedin.com/jobs/search?currentJobId={j_id}" | ||
ti_el = "UNK" if pd.isna(ti_el) else str(int(ti_el)) | ||
appli = "UNK" if pd.isna(appli) else str(int(appli)) | ||
exp = "UNK" if pd.isna(exp) else str(int(exp)) | ||
comp = "UNK" if isinstance(comp, float) else comp | ||
loc = "UNK" if isinstance(loc, float) else loc | ||
desc = [] if isinstance(desc, float) else ast.literal_eval(desc) # it is a list | ||
|
||
|
||
link = html.escape(link) | ||
|
||
desc_list = [] | ||
if desc: | ||
for item in desc: | ||
item = item.strip() | ||
if item: desc_list.append(f"<li>{item.upper() if len(item) <=3 else item}</li>") | ||
else: desc_list = [f"<li>NOTHING</li>"] | ||
|
||
|
||
outer_html += f"""{index+1}. <a href="{link}">{pos}</a> | {appli} applicants | {ti_el} minutes ago | {exp} years | {comp} | {loc}""" | ||
|
||
desc = f""" | ||
<details> | ||
<div class="scrollable"> | ||
<summary>Tech Stack: <span style="color:green"><b>{len(desc)}</b></span> matches</summary> | ||
<ul> | ||
{''.join(desc_list)} | ||
</ul> | ||
</div> | ||
</details> | ||
<hr style="border-top: 1px solid black"> | ||
""" | ||
|
||
outer_html += desc | ||
|
||
|
||
outer_html += "</div>" | ||
st.markdown(outer_html, unsafe_allow_html=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
,shady,shady,02.06.2023 15:48,file:///home/shady/.config/libreoffice/4; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"process_busy": false, "last_scrape_time": 6226385.0102873, "job_title": "Machine Learning, Deep Learning, Computer Vision", "auto_scrape_frequency_in_mins": 10, "max_jobs": 10, "location": "india", "experience": false, "data_freshness_in_hours": 0.2} |
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
Oops, something went wrong.