airesumejobmatchingapi-5g56kf/app/services/ai_service.py

import asyncio
import hashlib
import cohere
from typing import Dict, List, Any
from app.core.config import settings
from app.core.cache import ai_cache, cache_response
import json


class AIService:
    def __init__(self):
        self.client = cohere.AsyncClient(api_key=settings.COHERE_API_KEY)
        self._semaphore = asyncio.Semaphore(5)  # Limit concurrent AI calls

    def _create_cache_key(self, text: str, operation: str) -> str:
        """Create a cache key for AI operations"""
        text_hash = hashlib.md5(text.encode()).hexdigest()
        return f"{operation}:{text_hash}"

    async def analyze_resume(self, resume_text: str) -> Dict[str, Any]:
        """Extract structured data from resume text using Cohere AI with caching"""
        # Check cache first
        cache_key = self._create_cache_key(resume_text, "analyze_resume")
        cached_result = ai_cache.get(cache_key)
        if cached_result:
            return cached_result

        # Rate limiting with semaphore
        async with self._semaphore:
            prompt = f"""Analyze this resume and extract structured information. Return only valid JSON.

Resume text:
{resume_text[:4000]}

Extract the following information in JSON format:
{{
    "skills": ["skill1", "skill2", ...],
    "experience_years": number,
    "education_level": "Bachelor's/Master's/PhD/High School/etc",
    "work_experience": [
        {{
            "company": "company name",
            "position": "job title",
            "duration": "time period",
            "description": "brief description"
        }}
    ],
    "education": [
        {{
            "institution": "school name",
            "degree": "degree type",
            "field": "field of study",
            "year": "graduation year"
        }}
    ],
    "contact_info": {{
        "email": "email address",
        "phone": "phone number",
        "location": "location"
    }}
}}

JSON:"""

            try:
                response = await self.client.chat(
                    model="command-r",
                    message=prompt,
                    temperature=0.1,
                    max_tokens=1500,
                    connectors=[]
                )

                result = response.text.strip()

                # Try to extract JSON from the response
                if result.startswith('{') and result.endswith('}'):
                    parsed_result = json.loads(result)
                else:
                    # Try to find JSON in the response
                    import re
                    json_match = re.search(r'\{.*\}', result, re.DOTALL)
                    if json_match:
                        parsed_result = json.loads(json_match.group())
                    else:
                        raise ValueError("No valid JSON found in response")

                # Cache the result for 1 hour
                ai_cache.set(cache_key, parsed_result, ttl=3600)
                return parsed_result

            except Exception as e:
                print(f"Error analyzing resume: {e}")
                # Return cached empty result to avoid repeated failures
                empty_result = {}
                ai_cache.set(cache_key, empty_result, ttl=300)  # Cache for 5 minutes
                return empty_result

    async def analyze_job_description(self, job_description: str) -> Dict[str, Any]:
        """Extract structured data from job description using Cohere AI with caching"""
        # Check cache first
        cache_key = self._create_cache_key(job_description, "analyze_job")
        cached_result = ai_cache.get(cache_key)
        if cached_result:
            return cached_result

        async with self._semaphore:
            prompt = f"""Analyze this job description and extract structured information. Return only valid JSON.

Job description:
{job_description[:3000]}

Extract the following information in JSON format:
{{
    "required_skills": ["skill1", "skill2", ...],
    "preferred_skills": ["skill1", "skill2", ...],
    "experience_level": "entry/mid/senior",
    "education_requirement": "minimum education required",
    "key_responsibilities": ["responsibility1", "responsibility2", ...],
    "company_benefits": ["benefit1", "benefit2", ...],
    "job_type": "full-time/part-time/contract",
    "remote_option": "yes/no/hybrid"
}}

JSON:"""

            try:
                response = await self.client.chat(
                    model="command-r",
                    message=prompt,
                    temperature=0.1,
                    max_tokens=1000,
                    connectors=[]
                )

                result = response.text.strip()

                # Try to extract JSON from the response
                if result.startswith('{') and result.endswith('}'):
                    parsed_result = json.loads(result)
                else:
                    # Try to find JSON in the response
                    import re
                    json_match = re.search(r'\{.*\}', result, re.DOTALL)
                    if json_match:
                        parsed_result = json.loads(json_match.group())
                    else:
                        raise ValueError("No valid JSON found in response")

                # Cache for 1 hour
                ai_cache.set(cache_key, parsed_result, ttl=3600)
                return parsed_result

            except Exception as e:
                print(f"Error analyzing job description: {e}")
                empty_result = {}
                ai_cache.set(cache_key, empty_result, ttl=300)
                return empty_result

    async def calculate_match_score(
        self, resume_data: Dict[str, Any], job_data: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Calculate match score between resume and job description using Cohere AI with caching"""
        # Create cache key from both resume and job data
        combined_data = f"{json.dumps(resume_data, sort_keys=True)}{json.dumps(job_data, sort_keys=True)}"
        cache_key = self._create_cache_key(combined_data, "match_score")
        cached_result = ai_cache.get(cache_key)
        if cached_result:
            return cached_result

        async with self._semaphore:
            # Limit data size for faster processing
            limited_resume = {k: v for k, v in resume_data.items() if k in ["skills", "experience_years", "education_level"]}
            limited_job = {k: v for k, v in job_data.items() if k in ["required_skills", "preferred_skills", "experience_level", "education_requirement"]}

            prompt = f"""Calculate a match score between this resume and job requirements. Return only valid JSON.

RESUME DATA:
{json.dumps(limited_resume)}

JOB REQUIREMENTS:
{json.dumps(limited_job)}

Analyze and return a match score in this JSON format:
{{
    "overall_score": number_0_to_100,
    "skill_match_score": number_0_to_100,
    "experience_match_score": number_0_to_100,
    "education_match_score": number_0_to_100,
    "missing_skills": [
        {{"skill": "skill_name", "importance": "required/preferred", "suggestion": "how_to_acquire"}}
    ],
    "strengths": ["strength1", "strength2"],
    "weaknesses": ["weakness1", "weakness2"],
    "overall_feedback": "brief_summary"
}}

JSON:"""

            try:
                response = await self.client.chat(
                    model="command-r",
                    message=prompt,
                    temperature=0.2,
                    max_tokens=1500,
                    connectors=[]
                )

                result = response.text.strip()

                # Try to extract JSON from the response
                if result.startswith('{') and result.endswith('}'):
                    parsed_result = json.loads(result)
                else:
                    # Try to find JSON in the response
                    import re
                    json_match = re.search(r'\{.*\}', result, re.DOTALL)
                    if json_match:
                        parsed_result = json.loads(json_match.group())
                    else:
                        raise ValueError("No valid JSON found in response")

                # Cache for 30 minutes
                ai_cache.set(cache_key, parsed_result, ttl=1800)
                return parsed_result

            except Exception as e:
                print(f"Error calculating match score: {e}")
                default_result = {"overall_score": 0, "skill_match_score": 0, "experience_match_score": 0, "education_match_score": 0}
                ai_cache.set(cache_key, default_result, ttl=300)
                return default_result

    async def generate_resume_suggestions(
        self, resume_data: Dict[str, Any], job_data: Dict[str, Any], match_analysis: Dict[str, Any]
    ) -> List[Dict[str, str]]:
        """Generate suggestions for improving resume using Cohere AI with caching"""
        # Create cache key from all input data
        combined_data = f"{json.dumps(resume_data, sort_keys=True)}{json.dumps(job_data, sort_keys=True)}{json.dumps(match_analysis, sort_keys=True)}"
        cache_key = self._create_cache_key(combined_data, "resume_suggestions")
        cached_result = ai_cache.get(cache_key)
        if cached_result:
            return cached_result

        async with self._semaphore:
            # Use only essential data for faster processing
            limited_data = {
                "current_skills": resume_data.get("skills", []),
                "missing_skills": match_analysis.get("missing_skills", []),
                "weaknesses": match_analysis.get("weaknesses", [])
            }

            prompt = f"""Provide 3-5 specific resume improvement suggestions. Return only valid JSON.

Analysis data:
{json.dumps(limited_data)}

Return suggestions in this JSON array format:
[
    {{
        "section": "skills/experience/education/summary",
        "suggestion": "specific_actionable_suggestion",
        "priority": "high/medium/low",
        "impact": "brief_explanation"
    }}
]

JSON:"""

            try:
                response = await self.client.chat(
                    model="command-r",
                    message=prompt,
                    temperature=0.3,
                    max_tokens=800,
                    connectors=[]
                )

                result = response.text.strip()

                # Try to extract JSON from the response
                if result.startswith('[') and result.endswith(']'):
                    parsed_result = json.loads(result)
                else:
                    # Try to find JSON array in the response
                    import re
                    json_match = re.search(r'\[.*\]', result, re.DOTALL)
                    if json_match:
                        parsed_result = json.loads(json_match.group())
                    else:
                        raise ValueError("No valid JSON array found in response")

                # Cache for 1 hour
                ai_cache.set(cache_key, parsed_result, ttl=3600)
                return parsed_result

            except Exception as e:
                print(f"Error generating resume suggestions: {e}")
                empty_result = []
                ai_cache.set(cache_key, empty_result, ttl=300)
                return empty_result

    async def generate_cover_letter(
        self, resume_data: Dict[str, Any], job_data: Dict[str, Any], user_name: str
    ) -> str:
        """Generate a personalized cover letter using Cohere AI with caching"""
        # Create cache key from resume, job, and user name
        combined_data = f"{json.dumps(resume_data, sort_keys=True)}{json.dumps(job_data, sort_keys=True)}{user_name}"
        cache_key = self._create_cache_key(combined_data, "cover_letter")
        cached_result = ai_cache.get(cache_key)
        if cached_result:
            return cached_result

        async with self._semaphore:
            # Use essential data only
            essential_resume = {
                "skills": resume_data.get("skills", [])[:8],  # Top 8 skills
                "work_experience": resume_data.get("work_experience", [])[:2]  # Only first 2 jobs
            }
            essential_job = {
                "title": job_data.get("title", ""),
                "company": job_data.get("company", ""),
                "required_skills": job_data.get("required_skills", [])[:5]  # Top 5 skills
            }

            prompt = f"""Write a professional cover letter for {user_name} applying to this job.

APPLICANT BACKGROUND:
{json.dumps(essential_resume)}

JOB DETAILS:
{json.dumps(essential_job)}

Write a compelling 3-paragraph cover letter that:
- Opens with enthusiasm for the specific role
- Highlights relevant skills and experience
- Closes with a call to action

Keep it professional, concise, and engaging. Do not include placeholders or brackets."""

            try:
                response = await self.client.chat(
                    model="command-r",
                    message=prompt,
                    temperature=0.4,
                    max_tokens=600,
                    connectors=[]
                )

                result = response.text.strip()

                # Cache for 30 minutes
                ai_cache.set(cache_key, result, ttl=1800)
                return result

            except Exception as e:
                print(f"Error generating cover letter: {e}")
                error_msg = "Unable to generate cover letter at this time."
                ai_cache.set(cache_key, error_msg, ttl=300)
                return error_msg