Skip to content

Commit

Permalink
app running
Browse files Browse the repository at this point in the history
  • Loading branch information
deshwalmahesh committed Oct 2, 2023
1 parent 245342d commit 5da4c5e
Show file tree
Hide file tree
Showing 13 changed files with 627 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.csv
*.log
41 changes: 41 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
FROM python:3.8

# install google chrome
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'
RUN apt-get -y update
RUN apt-get install -y google-chrome-stable

# install chromedriver
RUN apt-get install -yqq unzip
RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
RUN unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/

RUN apt-get install -yqq mpg123

RUN mkdir -p /root/.streamlit

RUN bash -c 'echo -e "\
[general]\n\
email = \"[email protected]\"\n\
" > /root/.streamlit/credentials.toml'

# set display port to avoid crash
ENV DISPLAY=:99

# upgrade pip
RUN pip install --upgrade pip

# Set the working directory in the container to /app
WORKDIR /app

COPY ./requirements.txt /app/requirements.txt

RUN pip install --upgrade -r /app/requirements.txt

# Add the current directory contents into the container at /app
ADD . /app


# Run script.py when the container launches
CMD ["streamlit", "run", "main.py", "--server.port", "9999"]
Binary file added __pycache__/helpers.cpython-38.pyc
Binary file not shown.
99 changes: 99 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from helpers import *
import streamlit as st
import html
from streamlit_extras.add_vertical_space import add_vertical_space
import json
import subprocess
import ast


st.set_page_config(page_title="Job Scraper",layout="wide")

if ["first_start"] not in st.session_state:
st.session_state["first_start"] = True


with open("./data/app_metadata.json", "r") as f: metadata = json.load(f) # STart script on reload
if ((time.time() - metadata["last_scrape_time"] > (metadata["auto_scrape_frequency_in_mins"] * 60)) and (not metadata["process_busy"])) or (st.session_state["first_start"]):

st.session_state["first_start"] = False
subprocess.Popen(["/home/shady/anaconda3/envs/py38/bin/python", "./script.py"]) # run script


with open("./data/app_metadata.json","r") as f:st.session_state["last_scrape_time"] = metadata["last_scrape_time"]
df = pd.read_csv("./data/most_recently_scraped.csv")


if "job_title" not in st.session_state:
st.session_state["job_title"] = "Machine Learning, Deep Learning, Computer Vision"

if "location" not in st.session_state:
st.session_state["location"] = "india"


with st.sidebar:
st.markdown('''
# About
💡 <span style="color:teal">Scrape Linkedin Jobs of your choice to stay ahead of others!!!</span>
''', unsafe_allow_html=True)

st.title(f""":green[_{df.shape[0]} New Jobs_]""")

data_staleness = int((time.time() - st.session_state["last_scrape_time"]) / 60)
st.warning(f"This data was scraped {data_staleness} minutes ago. Click on 'Fetch Fresh Jobs' to get new jobs data",icon="⚠️")
st.markdown("---")


scrollable_css = """
<style>
.scrollable {
max-height: 500px;
overflow-y: auto;
}
</style>
"""
st.markdown(scrollable_css, unsafe_allow_html=True)

outer_html = """<div style="overflow:auto; width:1280px; height:850px;">"""
for index, row in df.iterrows():

j_id , exp, ti_el, appli, pos, comp, loc, desc = row.values.tolist()
link = f"https://www.linkedin.com/jobs/search?currentJobId={j_id}"
ti_el = "UNK" if pd.isna(ti_el) else str(int(ti_el))
appli = "UNK" if pd.isna(appli) else str(int(appli))
exp = "UNK" if pd.isna(exp) else str(int(exp))
comp = "UNK" if isinstance(comp, float) else comp
loc = "UNK" if isinstance(loc, float) else loc
desc = [] if isinstance(desc, float) else ast.literal_eval(desc) # it is a list


link = html.escape(link)

desc_list = []
if desc:
for item in desc:
item = item.strip()
if item: desc_list.append(f"<li>{item.upper() if len(item) <=3 else item}</li>")
else: desc_list = [f"<li>NOTHING</li>"]


outer_html += f"""{index+1}. <a href="{link}">{pos}</a> | {appli} applicants | {ti_el} minutes ago | {exp} years | {comp} | {loc}"""

desc = f"""
<details>
<div class="scrollable">
<summary>Tech Stack: <span style="color:green"><b>{len(desc)}</b></span> matches</summary>
<ul>
{''.join(desc_list)}
</ul>
</div>
</details>
<hr style="border-top: 1px solid black">
"""

outer_html += desc


outer_html += "</div>"
st.markdown(outer_html, unsafe_allow_html=True)
1 change: 1 addition & 0 deletions data/.~lock.debug_recent.csv#
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
,shady,shady,02.06.2023 15:48,file:///home/shady/.config/libreoffice/4;
1 change: 1 addition & 0 deletions data/app_metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"process_busy": false, "last_scrape_time": 6226385.0102873, "job_title": "Machine Learning, Deep Learning, Computer Vision", "auto_scrape_frequency_in_mins": 10, "max_jobs": 10, "location": "india", "experience": false, "data_freshness_in_hours": 0.2}
1 change: 1 addition & 0 deletions data/dictonary.json

Large diffs are not rendered by default.

Binary file added data/notification.mp3
Binary file not shown.
Binary file added data/tts.mp3
Binary file not shown.
Loading

0 comments on commit 5da4c5e

Please sign in to comment.