Complete issue #66 - Remove duplicates and add focused repository tools

Copilot · sgerlach · Copilot · commit f180ecf63509 · 2025-10-27T21:19:26.000Z
Co-authored-by: sgerlach &lt;4514084+sgerlach@users.noreply.github.com&gt;
diff --git a/stackhawk_mcp/server.py b/stackhawk_mcp/server.py
@@ -2102,68 +2102,7 @@ def _detect_project_language_and_frameworks(self) -> dict:
             "frameworks": frameworks
         }
 
-    async def _get_sensitive_data_report(self, org_id: str, data_type_filter: str = "All", time_range: str = "30d", include_details: bool = True, group_by: str = "data_type", **kwargs) -> Dict[str, Any]:
-        """
-        Generate a grouped and summarized sensitive data report for an entire organization.
-        Use this for org-wide analytics, compliance, and reporting.
-        """
-        try:
-            # For all-time reports, fetch all results to get complete picture
-            if time_range == "all":
-                findings_response = await self.client.list_sensitive_data_findings(org_id, all_results=True)
-                findings = findings_response.get("sensitiveDataFindings", [])
-            else:
-                # For time-limited reports, use pagination to get a reasonable sample
-                findings_params = {"pageSize": 1000}
-                findings_response = await self.client.list_sensitive_data_findings(org_id, **findings_params)
-                findings = findings_response.get("sensitiveDataFindings", [])
 
-            # Apply client-side filters
-            filtered_findings = findings
-
-            if data_type_filter != "All":
-                filtered_findings = [
-                    f for f in filtered_findings
-                    if f.get("dataType") == data_type_filter
-                ]
-
-            if time_range != "all":
-                time_filter = {
-                    "startDate": (datetime.now() - timedelta(days=int(time_range[:-1]))).isoformat(),
-                    "endDate": datetime.now().isoformat()
-                }
-                filtered_findings = [
-                    f for f in filtered_findings
-                    if f.get("findingDate") >= time_filter["startDate"] and f.get("findingDate") <= time_filter["endDate"]
-                ]
-
-            # Group findings
-            grouped_findings = {}
-            for finding in filtered_findings:
-                group_key = finding.get(group_by)
-                if group_key not in grouped_findings:
-                    grouped_findings[group_key] = []
-                grouped_findings[group_key].append(finding)
-
-            # Format findings
-            formatted_findings = []
-            for group, findings in grouped_findings.items():
-                formatted_findings.append({
-                    "group": group,
-                    "findings": findings
-                })
-
-            return {
-                "organizationId": org_id,
-                "dataTypeFilter": data_type_filter,
-                "timeRange": time_range,
-                "report": formatted_findings,
-                "totalFindings": len(filtered_findings),
-                "timestamp": datetime.now().isoformat()
-            }
-        except Exception as e:
-            debug_print(f"Error in _get_sensitive_data_report: {e}")
-            raise
 
     async def _analyze_sensitive_data_trends(self, org_id: str, analysis_period: str = "90d", include_applications: bool = True, include_repositories: bool = True, **kwargs) -> Dict[str, Any]:
         """
@@ -2248,90 +2187,7 @@ async def _analyze_sensitive_data_trends(self, org_id: str, analysis_period: str
             debug_print(f"Error in _analyze_sensitive_data_trends: {e}")
             raise
 
-    async def _get_critical_sensitive_data(self, org_id: str, data_types: List[str] = None, include_remediation: bool = True, max_results: int = 50, **kwargs) -> Dict[str, Any]:
-        """Get critical sensitive data findings requiring immediate attention"""
-        try:
-            if data_types is None:
-                data_types = ["PII", "PCI", "PHI"]
-
-            # For critical findings, we want to see ALL critical findings, not just the first page
-            findings_response = await self.client.list_sensitive_data_findings(org_id, all_results=True)
-            findings = findings_response.get("sensitiveDataFindings", [])
 
-            # Filter findings based on data types
-            filtered_findings = [
-                f for f in findings
-                if f.get("dataType") in data_types
-            ]
-
-            # Include remediation details
-            if include_remediation:
-                for finding in filtered_findings:
-                    finding["remediation"] = finding.get("remediationDetails", "No remediation details available")
-
-            return {
-                "organizationId": org_id,
-                "dataTypes": data_types,
-                "findings": filtered_findings,
-                "totalFindings": len(filtered_findings),
-                "timestamp": datetime.now().isoformat()
-            }
-        except Exception as e:
-            debug_print(f"Error in _get_critical_sensitive_data: {e}")
-            raise
-
-    async def _generate_sensitive_data_summary(self, org_id: str, time_period: str = "30d", include_recommendations: bool = True, include_risk_assessment: bool = True, **kwargs) -> Dict[str, Any]:
-        """Generate executive-level sensitive data summary and recommendations"""
-        try:
-            # For all-time reports, fetch all results to get complete picture
-            if time_period == "all":
-                findings_response = await self.client.list_sensitive_data_findings(org_id, all_results=True)
-                findings = findings_response.get("sensitiveDataFindings", [])
-            else:
-                # For time-limited reports, use pagination to get a reasonable sample
-                findings_params = {"pageSize": 1000}
-                findings_response = await self.client.list_sensitive_data_findings(org_id, **findings_params)
-                findings = findings_response.get("sensitiveDataFindings", [])
-
-            # Group findings by data type
-            data_type_findings = {"PII": [], "PCI": [], "PHI": [], "Other": []}
-            for finding in findings:
-                data_type = finding.get("dataType", "Other")
-                if data_type in data_type_findings:
-                    data_type_findings[data_type].append(finding)
-                else:
-                    data_type_findings["Other"].append(finding)
-
-            # Generate summary
-            summary = {
-                "totalFindings": len(findings),
-                "dataTypeBreakdown": data_type_findings
-            }
-
-            # Include recommendations
-            if include_recommendations:
-                summary["recommendations"] = [
-                    {
-                        "dataType": data_type,
-                        "recommendation": f"Review and secure {data_type} data exposure"
-                    }
-                    for data_type, findings in data_type_findings.items()
-                    if findings
-                ]
-
-            # Include risk assessment
-            if include_risk_assessment:
-                summary["riskAssessment"] = self._calculate_sensitive_data_risk_score(findings)
-
-            return {
-                "organizationId": org_id,
-                "timePeriod": time_period,
-                "summary": summary,
-                "timestamp": datetime.now().isoformat()
-            }
-        except Exception as e:
-            debug_print(f"Error in _generate_sensitive_data_summary: {e}")
-            raise
 
     async def _check_repository_attack_surface(self, repo_name: str = None, org_id: str = None, include_vulnerabilities: bool = True, include_apps: bool = True, **kwargs) -> Dict[str, Any]:
         """Check if a repository name exists in StackHawk attack surface and get security information"""
diff --git a/test_new_tools.py b/test_new_tools.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+"""
+Test script for the new MCP tools implemented for issue #66
+
+This script tests:
+1. check_repository_attack_surface - Check if repo is in attack surface
+2. check_repository_sensitive_data - Check if repo has sensitive data
+3. list_application_repository_connections - List app-repo connections
+4. get_sensitive_data_summary - Comprehensive sensitive data summary
+"""
+
+import asyncio
+import json
+import os
+import sys
+from datetime import datetime
+
+# Add the stackhawk_mcp directory to the path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '.')))
+
+from stackhawk_mcp.server import StackHawkMCPServer
+
+
+async def test_new_tools():
+    """Test the new MCP tools implemented for issue #66"""
+    
+    api_key = os.environ.get("STACKHAWK_API_KEY")
+    if not api_key:
+        print("&#10060; STACKHAWK_API_KEY environment variable is required")
+        print("Please set it to test the new tools")
+        return
+    
+    print("&#128269; Testing New MCP Tools for Issue #66")
+    print("=" * 60)
+    
+    server = StackHawkMCPServer(api_key)
+    
+    try:
+        # Get user info
+        user_info = await server.client.get_user_info()
+        org_id = user_info["user"]["external"]["organizations"][0]["organization"]["id"]
+        org_name = user_info["user"]["external"]["organizations"][0]["organization"]["name"]
+        
+        print(f"&#9989; Connected to organization: {org_name} (ID: {org_id})")
+        print()
+        
+        # Test 1: Check Repository Attack Surface
+        print("1. Testing check_repository_attack_surface...")
+        try:
+            # Test with the current repository name
+            current_repo = "stackhawk-mcp"  # This repo
+            result = await server._check_repository_attack_surface(
+                repo_name=current_repo,
+                include_vulnerabilities=True,
+                include_apps=True
+            )
+            print("&#9989; Repository attack surface check completed!")
+            print(f"   Repository: {result['repository_name']}")
+            print(f"   Found in attack surface: {result.get('found_in_attack_surface', False)}")
+            print(f"   Total matching repos: {len(result.get('matching_repositories', []))}")
+            if result.get('connected_applications'):
+                print(f"   Connected apps: {result['total_connected_apps']}")
+            print(f"   Recommendation: {result.get('recommendation', 'None')}")
+        except Exception as e:
+            print(f"&#10060; Failed to check repository attack surface: {e}")
+        
+        print("\n" + "="*60 + "\n")
+        
+        # Test 2: Check Repository Sensitive Data
+        print("2. Testing check_repository_sensitive_data...")
+        try:
+            result = await server._check_repository_sensitive_data(
+                repo_name=current_repo,
+                data_type_filter="All",
+                include_remediation=True
+            )
+            print("&#9989; Repository sensitive data check completed!")
+            print(f"   Repository: {result['repository_name']}")
+            print(f"   Found in StackHawk: {result.get('found_in_stackhawk', False)}")
+            print(f"   Has sensitive data: {result.get('has_sensitive_data', False)}")
+            print(f"   Total findings: {result.get('total_findings', 0)}")
+            if result.get('data_type_breakdown'):
+                print(f"   Data type breakdown: {result['data_type_breakdown']}")
+            print(f"   Recommendation: {result.get('recommendation', 'None')}")
+        except Exception as e:
+            print(f"&#10060; Failed to check repository sensitive data: {e}")
+        
+        print("\n" + "="*60 + "\n")
+        
+        # Test 3: List Application Repository Connections
+        print("3. Testing list_application_repository_connections...")
+        try:
+            result = await server._list_application_repository_connections(
+                include_repo_details=True,
+                include_app_details=True,
+                filter_connected_only=False
+            )
+            print("&#9989; Application-repository connections listed!")
+            print(f"   Total applications: {result['total_applications']}")
+            print(f"   Total repositories: {result['total_repositories']}")
+            print(f"   Total connections: {result['total_connections']}")
+            
+            coverage_stats = result.get('coverage_stats', {})
+            print(f"   Connected applications: {coverage_stats.get('connected_applications', 0)}")
+            print(f"   Orphaned applications: {coverage_stats.get('orphaned_applications', 0)}")
+            print(f"   Orphaned repositories: {coverage_stats.get('orphaned_repositories', 0)}")
+            print(f"   Connection coverage: {coverage_stats.get('connection_coverage', 0):.1f}%")
+            
+            recommendations = result.get('recommendations', [])
+            if recommendations:
+                print("   Recommendations:")
+                for i, rec in enumerate(recommendations[:3], 1):
+                    print(f"     {i}. {rec}")
+        except Exception as e:
+            print(f"&#10060; Failed to list application-repository connections: {e}")
+        
+        print("\n" + "="*60 + "\n")
+        
+        # Test 4: Get Comprehensive Sensitive Data Summary
+        print("4. Testing get_comprehensive_sensitive_data_summary...")
+        try:
+            result = await server._get_comprehensive_sensitive_data_summary(
+                time_period="30d",
+                include_trends=True,
+                include_critical_only=False,
+                include_recommendations=True,
+                group_by="data_type"
+            )
+            print("&#9989; Comprehensive sensitive data summary generated!")
+            print(f"   Total findings: {result['total_findings']}")
+            print(f"   Analysis type: {result['analysis_type']}")
+            print(f"   Overall risk score: {result.get('overall_risk_score', 0):.1f}")
+            print(f"   Group by: {result['group_by']}")
+            
+            grouped_summary = result.get('grouped_summary', {})
+            print(f"   Groups found: {len(grouped_summary)}")
+            for group_name, group_data in list(grouped_summary.items())[:3]:
+                print(f"     {group_name}: {group_data['count']} findings, risk: {group_data['risk_score']:.1f}")
+            
+            recommendations = result.get('recommendations', [])
+            if recommendations:
+                print("   Recommendations:")
+                for i, rec in enumerate(recommendations[:3], 1):
+                    print(f"     {i}. {rec}")
+        except Exception as e:
+            print(f"&#10060; Failed to generate comprehensive sensitive data summary: {e}")
+        
+        print("\n" + "="*60 + "\n")
+        
+        # Test 5: Test MCP Tool Interface
+        print("5. Testing MCP tool interface...")
+        try:
+            # List available tools
+            tools = await server._list_tools_handler()
+            new_tool_names = [
+                "check_repository_attack_surface",
+                "check_repository_sensitive_data", 
+                "list_application_repository_connections",
+                "get_sensitive_data_summary"
+            ]
+            
+            found_tools = [tool.name for tool in tools if tool.name in new_tool_names]
+            print(f"&#9989; Found {len(found_tools)}/{len(new_tool_names)} new tools in MCP interface")
+            for tool_name in found_tools:
+                print(f"   &#10003; {tool_name}")
+            
+            missing_tools = [name for name in new_tool_names if name not in found_tools]
+            if missing_tools:
+                print("   Missing tools:")
+                for tool_name in missing_tools:
+                    print(f"   &#10007; {tool_name}")
+        except Exception as e:
+            print(f"&#10060; Failed to test MCP tool interface: {e}")
+        
+        print("\n" + "="*60 + "\n")
+        
+        # Test 6: Demonstrate improved tool usage
+        print("6. Testing tool call interface...")
+        try:
+            # Test calling the new tool through the MCP interface
+            result = await server.call_tool(
+                "check_repository_attack_surface",
+                {"repo_name": "test-repo", "include_vulnerabilities": True}
+            )
+            print("&#9989; Tool call interface working!")
+            print(f"   Response type: {type(result)}")
+            print(f"   Response length: {len(result) if result else 0}")
+        except Exception as e:
+            print(f"&#9888;&#65039;  Tool call interface test failed: {e}")
+        
+        print("\n" + "="*60)
+        print("&#9989; All New MCP Tools Testing Complete!")
+        print("\nSummary of Changes for Issue #66:")
+        print("- &#9989; Removed duplicate sensitive data tools")
+        print("- &#9989; Added attack surface lookup for current repository")
+        print("- &#9989; Added sensitive data lookup for current repository")
+        print("- &#9989; Added application/code repository connection mapping")
+        print("- &#9989; Consolidated sensitive data tools into single comprehensive tool")
+        print("- &#9989; All tools support auto-detection of current repository name")
+        print("- &#9989; All tools provide actionable recommendations")
+        
+    except Exception as e:
+        print(f"&#10060; Error during testing: {e}")
+        import traceback
+        traceback.print_exc()
+    
+    finally:
+        await server.cleanup()
+
+
+if __name__ == "__main__":
+    asyncio.run(test_new_tools())