-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathextract_salary.py
93 lines (71 loc) · 2.52 KB
/
extract_salary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""
This module contains a function to extract salary information from text.
"""
import argparse
import logging
import re
def extract_salary(text):
"""
This function extracts salary information from text.
"""
# Setup logging
logging.basicConfig(level=logging.INFO)
# Regular expression patterns
salary_pattern_1 = r"\$([\d,]+)(?:\.(\d{2}))?"
salary_pattern_2 = r"\$([\d\.]+)(K)"
salary_pattern_3 = (
r"\$(?!401K)([\d,]+)(?:\.(\d{2}))?\s*(K)?\s*-"
r"\s*\$(?!401K)([\d,]+)(?:\.(\d{2}))?(K)?"
)
hourly_pattern = r"\$([\d\.]+)\s*to\s*\$([\d\.]+)\/hour"
million_pattern = r"\b\d+M\b"
# Search for patterns
match1 = re.search(salary_pattern_1, text)
match2 = re.search(salary_pattern_2, text)
match3 = re.search(salary_pattern_3, text)
match4 = re.search(hourly_pattern, text)
match5 = re.search(million_pattern, text)
salary_low, salary_high = None, None
if match3:
salary_low = (
float(match3.group(1).replace(",", "")) * 1000
if match3.group(3) == "K"
else float(match3.group(1).replace(",", ""))
)
salary_high = (
float(match3.group(4).replace(",", "")) * 1000
if match3.group(6) == "K"
else float(match3.group(4).replace(",", ""))
)
elif match2:
salary_low = salary_high = float(match2.group(1).replace(",", "")) * 1000
elif match1:
salary_low = salary_high = (
float(match1.group(1).replace(",", "") + "." + match1.group(2))
if match1.group(2)
else float(match1.group(1).replace(",", ""))
)
elif match4:
salary_low = float(match4.group(2)) * 40 * 52
salary_high = float(match4.group(3)) * 40 * 52
elif match5:
return (None, None)
if salary_low is not None and salary_low < 100:
salary_low *= 1000
if salary_high is not None and salary_high < 100:
salary_high *= 1000
logging.info("Extracted salary range: %s to %s", salary_low, salary_high)
return salary_low, salary_high
def main():
"""
This function is the main function.
"""
parser = argparse.ArgumentParser(
description="Extract salary information from text."
)
parser.add_argument("text", type=str, help="Text containing salary information.")
args = parser.parse_args()
salary_low, salary_high = extract_salary(args.text)
print(f"Extracted salary range: {salary_low} to {salary_high}")
if __name__ == "__main__":
main()