app running

deshwalmahesh · Oct 2, 2023 · 5da4c5e · 5da4c5e
1 parent 245342d
commit 5da4c5e
Show file tree

Hide file tree

Showing 13 changed files with 627 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+*.csv
+*.log
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,41 @@
+FROM python:3.8
+
+# install google chrome
+RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
+RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'
+RUN apt-get -y update
+RUN apt-get install -y google-chrome-stable
+
+# install chromedriver
+RUN apt-get install -yqq unzip
+RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
+RUN unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
+
+RUN apt-get install -yqq mpg123
+
+RUN mkdir -p /root/.streamlit
+
+RUN bash -c 'echo -e "\
+[general]\n\
+email = \"[email protected]\"\n\
+" > /root/.streamlit/credentials.toml'
+
+# set display port to avoid crash
+ENV DISPLAY=:99
+
+# upgrade pip
+RUN pip install --upgrade pip
+
+# Set the working directory in the container to /app
+WORKDIR /app
+
+COPY ./requirements.txt /app/requirements.txt
+
+RUN pip install --upgrade -r /app/requirements.txt
+
+# Add the current directory contents into the container at /app
+ADD . /app
+
+
+# Run script.py when the container launches
+CMD ["streamlit", "run", "main.py", "--server.port", "9999"]
diff --git a/__pycache__/helpers.cpython-38.pyc b/__pycache__/helpers.cpython-38.pyc
diff --git a/app.py b/app.py
@@ -0,0 +1,99 @@
+from helpers import *
+import streamlit as st
+import html
+from streamlit_extras.add_vertical_space import add_vertical_space
+import json
+import subprocess
+import ast
+
+
+st.set_page_config(page_title="Job Scraper",layout="wide")
+
+if ["first_start"] not in st.session_state:
+    st.session_state["first_start"] = True
+
+
+with open("./data/app_metadata.json", "r") as f: metadata = json.load(f) # STart script on reload 
+if ((time.time() - metadata["last_scrape_time"] > (metadata["auto_scrape_frequency_in_mins"] * 60)) and (not metadata["process_busy"])) or (st.session_state["first_start"]):
+
+    st.session_state["first_start"] = False
+    subprocess.Popen(["/home/shady/anaconda3/envs/py38/bin/python", "./script.py"]) # run script 
+
+
+with open("./data/app_metadata.json","r") as f:st.session_state["last_scrape_time"] = metadata["last_scrape_time"]
+df = pd.read_csv("./data/most_recently_scraped.csv")
+
+
+if "job_title" not in st.session_state:
+    st.session_state["job_title"] = "Machine Learning, Deep Learning, Computer Vision"
+
+if "location" not in st.session_state:
+    st.session_state["location"] = "india"
+
+
+with st.sidebar:
+    st.markdown('''
+    # About
+
+    💡 <span style="color:teal">Scrape Linkedin Jobs of your choice to stay ahead of others!!!</span>
+    ''', unsafe_allow_html=True)
+
+st.title(f""":green[_{df.shape[0]} New Jobs_]""")
+
+data_staleness = int((time.time() - st.session_state["last_scrape_time"]) / 60)
+st.warning(f"This data was scraped {data_staleness} minutes ago. Click on 'Fetch Fresh Jobs' to get new jobs data",icon="⚠️")
+st.markdown("---")
+
+
+scrollable_css = """
+<style>
+.scrollable {
+    max-height: 500px;
+    overflow-y: auto;
+}
+</style>
+"""
+st.markdown(scrollable_css, unsafe_allow_html=True)
+
+outer_html = """<div style="overflow:auto; width:1280px; height:850px;">"""
+for index, row in df.iterrows():
+
+        j_id , exp, ti_el, appli, pos, comp, loc, desc = row.values.tolist()
+        link = f"https://www.linkedin.com/jobs/search?currentJobId={j_id}"
+        ti_el = "UNK" if pd.isna(ti_el) else str(int(ti_el))
+        appli = "UNK" if pd.isna(appli) else str(int(appli))
+        exp = "UNK" if pd.isna(exp) else str(int(exp))
+        comp = "UNK" if isinstance(comp, float) else comp
+        loc = "UNK" if isinstance(loc, float) else loc
+        desc = [] if isinstance(desc, float) else ast.literal_eval(desc) # it is a list
+
+
+        link = html.escape(link)
+
+        desc_list = []
+        if desc:
+            for item in desc:
+                 item = item.strip()
+                 if item: desc_list.append(f"<li>{item.upper() if len(item) <=3 else item}</li>")
+        else: desc_list = [f"<li>NOTHING</li>"]
+
+
+        outer_html += f"""{index+1}. <a href="{link}">{pos}</a> | {appli} applicants | {ti_el} minutes ago | {exp} years | {comp} | {loc}"""
+
+        desc = f"""
+                <details>
+                    <div class="scrollable">
+                        <summary>Tech Stack: <span style="color:green"><b>{len(desc)}</b></span> matches</summary>
+                        <ul>
+                        {''.join(desc_list)}
+                        </ul>
+                    </div>
+                </details>
+                <hr style="border-top: 1px solid black">
+                """
+
+        outer_html += desc
+
+
+outer_html += "</div>"
+st.markdown(outer_html, unsafe_allow_html=True)
diff --git a/data/.~lock.debug_recent.csv# b/data/.~lock.debug_recent.csv#
@@ -0,0 +1 @@
+,shady,shady,02.06.2023 15:48,file:///home/shady/.config/libreoffice/4;
diff --git a/data/app_metadata.json b/data/app_metadata.json
@@ -0,0 +1 @@
+{"process_busy": false, "last_scrape_time": 6226385.0102873, "job_title": "Machine Learning, Deep Learning, Computer Vision", "auto_scrape_frequency_in_mins": 10, "max_jobs": 10, "location": "india", "experience": false, "data_freshness_in_hours": 0.2}
diff --git a/data/dictonary.json b/data/dictonary.json
diff --git a/data/notification.mp3 b/data/notification.mp3
diff --git a/data/tts.mp3 b/data/tts.mp3
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		,shady,shady,02.06.2023 15:48,file:///home/shady/.config/libreoffice/4;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"process_busy": false, "last_scrape_time": 6226385.0102873, "job_title": "Machine Learning, Deep Learning, Computer Vision", "auto_scrape_frequency_in_mins": 10, "max_jobs": 10, "location": "india", "experience": false, "data_freshness_in_hours": 0.2}