1- import json
21import logging
3- import os
4- from datetime import datetime , timedelta
52from typing import Optional
63
7- import requests
84import packaging .utils
95
106from guarddog .analyzer .metadata .typosquatting import TyposquatDetector
11- from guarddog .utils .config import TOP_PACKAGES_CACHE_LOCATION
127
138log = logging .getLogger ("guarddog" )
149
@@ -25,87 +20,35 @@ class PypiTyposquatDetector(TyposquatDetector):
2520
2621 def _get_top_packages (self ) -> set :
2722 """
28- Gets the package information of the top 5000 most downloaded PyPI packages
29-
30- Returns:
31- set: set of package data in the format:
32- {
33- ...
34- {
35- download_count: ...
36- project: <package-name>
37- }
38- ...
39- }
23+ Gets the package information of the top 5000 most downloaded PyPI packages.
24+ Uses the base class implementation with PyPI-specific parameters.
4025 """
41-
42- popular_packages_url = (
43- "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
26+ packages = self ._get_top_packages_with_refresh (
27+ packages_filename = "top_pypi_packages.json" ,
28+ popular_packages_url = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json" ,
29+ refresh_days = 30 ,
4430 )
4531
46- top_packages_filename = "top_pypi_packages.json"
47- resources_dir = TOP_PACKAGES_CACHE_LOCATION
48- if resources_dir is None :
49- resources_dir = os .path .abspath (
50- os .path .join (os .path .dirname (__file__ ), ".." , "resources" )
51- )
52-
53- top_packages_path = os .path .join (resources_dir , top_packages_filename )
54- top_packages_information = self ._get_top_packages_local (top_packages_path )
55-
56- if self ._file_is_expired (top_packages_path , days = 30 ):
57- new_information = self ._get_top_packages_network (popular_packages_url )
58- if new_information is not None :
59- top_packages_information = new_information
60-
61- with open (top_packages_path , "w+" ) as f :
62- json .dump (new_information , f , ensure_ascii = False , indent = 4 )
63-
64- if top_packages_information is None :
65- return set ()
66- return set (map (self .get_safe_name , top_packages_information ))
32+ # Apply canonicalization to PyPI package names
33+ return set (map (self ._canonicalize_name , packages ))
6734
68- @staticmethod
69- def get_safe_name (package ):
70- return packaging .utils .canonicalize_name (package ["project" ])
71-
72- def _file_is_expired (self , path : str , days : int ) -> bool :
73- try :
74- update_time = datetime .fromtimestamp (os .path .getmtime (path ))
75- return datetime .now () - update_time > timedelta (days = days )
76- except FileNotFoundError :
77- return True
78-
79- def _get_top_packages_local (self , path : str ) -> list [dict ] | None :
80- try :
81- with open (path , "r" ) as f :
82- result = json .load (f )
83- return self .extract_information (result )
84- except FileNotFoundError :
85- log .error (f"File not found: { path } " )
35+ def _extract_package_names (self , data : dict | list | None ) -> list | None :
36+ """
37+ Extract package names from PyPI data structure.
38+ PyPI data has format: {"rows": [{"project": "name", "download_count": ...}, ...]}
39+ """
40+ if data is None :
8641 return None
8742
88- def _get_top_packages_network (self , url : str ) -> list [dict ] | None :
89- try :
90- response = requests .get (url )
91- response .raise_for_status ()
92-
93- response_data = response .json ()
94- result = response_data
43+ if isinstance (data , dict ) and "rows" in data :
44+ return [row ["project" ] for row in data ["rows" ]]
9545
96- return self .extract_information (result )
97- except json .JSONDecodeError :
98- log .error (f'Couldn`t convert to json: "{ response .text } "' )
99- return None
100- except requests .exceptions .RequestException as e :
101- log .error (f"Network error: { e } " )
102- return None
46+ return None
10347
10448 @staticmethod
105- def extract_information (data : dict | None ) -> list [dict ] | None :
106- if data is not None :
107- return data .get ("rows" )
108- return None
49+ def _canonicalize_name (package_name : str ) -> str :
50+ """Canonicalize PyPI package names according to PEP 503."""
51+ return packaging .utils .canonicalize_name (package_name )
10952
11053 def detect (
11154 self ,
0 commit comments