Resume Parser (AI) Project code in Python

Resume Parser(AI) in Python

Visit pythonforbiginners.com to discover python tutorials

About the project: A Resume Parser is an excellent use case for structured Generative AI because it needs to take unstructured text and map it to specific, predictable data fields.

The file resume_parser.py, simulates this process. It uses a predefined JSON schema and the Gemini API to extract key information (Name, Email, Skills, Experience, Education) from a block of resume text.

Project Level: Advance

Prerequisites:

You'll need the requests library to make the API calls:


  pip install requests

Resume Parser Project Code

The code includes the necessary API call structure, exponential backoff for reliability, and a detailed JSON schema to guide the model's output.


import requests
import json
import time
import os

# --- Configuration ---

# IMPORTANT: The apiKey will be automatically injected in the runtime environment.
# Leave it as an empty string.
API_KEY = "" 
MODEL_NAME = "gemini-2.5-flash-preview-05-20"
API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL_NAME}:generateContent?key={API_KEY}"

# --- Structured Output Schema for Resume Data ---
# This schema dictates exactly how the model MUST format the output.
RESUME_SCHEMA = {
    "type": "OBJECT",
    "properties": {
        "name": {"type": "STRING", "description": "Full name of the candidate."},
        "email": {"type": "STRING", "description": "Candidate's primary email address."},
        "phone": {"type": "STRING", "description": "Candidate's phone number."},
        "total_years_experience": {"type": "NUMBER", "description": "Total professional experience in years (e.g., 5.5)."},
        "skills": {
            "type": "ARRAY",
            "description": "A list of technical and soft skills.",
            "items": {"type": "STRING"}
        },
        "education": {
            "type": "ARRAY",
            "description": "List of degrees, institutions, and completion years.",
            "items": {
                "type": "OBJECT",
                "properties": {
                    "institution": {"type": "STRING"},
                    "degree": {"type": "STRING"},
                    "year_graduated": {"type": "NUMBER"}
                },
                "propertyOrdering": ["institution", "degree", "year_graduated"]
            }
        },
        "experience": {
            "type": "ARRAY",
            "description": "List of work experience entries.",
            "items": {
                "type": "OBJECT",
                "properties": {
                    "title": {"type": "STRING"},
                    "company": {"type": "STRING"},
                    "duration": {"type": "STRING", "description": "Duration worked, e.g., 'Jan 2020 - Dec 2022'"},
                    "summary": {"type": "STRING", "description": "A brief 1-2 sentence summary of responsibilities and achievements."}
                },
                "propertyOrdering": ["title", "company", "duration", "summary"]
            }
        }
    },
    "propertyOrdering": ["name", "email", "phone", "total_years_experience", "skills", "education", "experience"]
}

# --- LLM System Instruction ---
SYSTEM_PROMPT = (
    "You are a world-class Resume Parsing AI. Your task is to accurately extract key "
    "information from the provided resume text and strictly format the output as a JSON "
    "object following the provided schema. Analyze the text for name, contact details, "
    "skills, education history, and work experience. Provide all dates and numbers accurately."
)


def exponential_backoff_fetch(url, payload, max_retries=5):
    """
    Fetches content from the API with exponential backoff for handling transient errors.
    """
    headers = {'Content-Type': 'application/json'}
    
    for attempt in range(max_retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload))
            response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
            return response.json()

        except requests.exceptions.RequestException as e:
            # Check if this is the final attempt
            if attempt == max_retries - 1:
                print(f"[Error] Final attempt failed. Could not reach API: {e}")
                return None
            
            # Log and calculate backoff
            wait_time = 2 ** attempt
            print(f"[API Error] Attempt {attempt+1}/{max_retries}: {e}. Retrying in {wait_time}s...")
            time.sleep(wait_time)
            
    return None

def parse_resume_text(resume_text):
    """
    Uses the Gemini API with structured output to parse the resume text.
    """
    print("Sending resume text to Gemini API for structured parsing...")
    
    payload = {
        "contents": [{"parts": [{"text": resume_text}]}],
        "systemInstruction": {"parts": [{"text": SYSTEM_PROMPT}]},
        "config": {
            "responseMimeType": "application/json",
            "responseSchema": RESUME_SCHEMA,
        }
    }

    try:
        api_response = exponential_backoff_fetch(API_URL, payload)
        
        if not api_response:
            print("Failed to get a response from the API after multiple retries.")
            return None

        # Extract the JSON string from the API response structure
        json_string = api_response.get('candidates', [{}])[0]\
                                  .get('content', {})\
                                  .get('parts', [{}])[0]\
                                  .get('text')
        
        if not json_string:
            print("API response structure was invalid or missing JSON content.")
            return None

        # Parse the JSON string into a Python dictionary
        parsed_data = json.loads(json_string)
        return parsed_data

    except json.JSONDecodeError:
        print("Error: Failed to decode JSON from API response.")
        print(f"Received text (may be truncated): {json_string[:500]}...")
        return None
    except Exception as e:
        print(f"An unexpected error occurred during parsing: {e}")
        return None

# --- Main Execution ---

if __name__ == "__main__":
    # In a real application, this text would come from a PDF/DOCX reader (e.g., pdfminer.six, python-docx)
    SAMPLE_RESUME_TEXT = """
    Software Development Lead
    
    John D. Smith | john.smith@example.com | (555) 123-4567
    Total Experience: 8 years

    Summary
    Senior Software Engineer with 8 years of experience specializing in scalable backend services 
    and cloud infrastructure (AWS). Proven ability to lead cross-functional teams and deliver 
    high-performance applications.

    Skills
    Python, Django, Flask, AWS, Docker, Kubernetes, PostgreSQL, MongoDB, REST APIs, Agile, Leadership

    Experience
    1. Lead Backend Engineer | TechCorp Solutions | Jan 2020 - Present
       Led a team of 5 engineers in designing and deploying a microservices architecture using Python and AWS Lambda, resulting in a 30% reduction in latency.

    2. Software Developer | Innovate Systems | Jul 2017 - Dec 2019
       Developed and maintained core API services for a popular e-commerce platform using Django. Implemented unit and integration tests, improving code coverage by 45%.

    Education
    Master of Science in Computer Science | Stanford University | 2017
    Bachelor of Engineering in Electrical Engineering | UC Berkeley | 2015
    """
    
    # 1. Parse the resume
    parsed_resume = parse_resume_text(SAMPLE_RESUME_TEXT)

    # 2. Display the results
    print("\n" + "="*50)
    
    if parsed_resume:
        print("✅ Resume Parsing Complete (Structured Data Output)")
        print("="*50)
        
        # Display key extracted fields
        print(f"Name: {parsed_resume.get('name')}")
        print(f"Email: {parsed_resume.get('email')}")
        print(f"Total Experience: {parsed_resume.get('total_years_experience')} years")
        print("-" * 50)
        
        print("Skills:")
        print(f"  {', '.join(parsed_resume.get('skills', []))}")
        print("-" * 50)
        
        print("Education:")
        for edu in parsed_resume.get('education', []):
            print(f"  {edu.get('degree')} from {edu.get('institution')} ({edu.get('year_graduated')})")
            
        print("-" * 50)
        print("Experience:")
        for exp in parsed_resume.get('experience', []):
            print(f"  - {exp.get('title')} at {exp.get('company')}")
            print(f"    Duration: {exp.get('duration')}")
            print(f"    Summary: {exp.get('summary')}")
            
        # Optional: Print the raw JSON output
        # print("\n--- RAW JSON OUTPUT ---")
        # print(json.dumps(parsed_resume, indent=2))
        
    else:
        print("❌ Resume Parsing Failed.")
    
    print("="*50)

This code demonstrates the full workflow for an AI-powered parser: reading the text, defining a strict output structure, calling the LLM, and handling the JSON response.

To use this with actual PDF/DOCX files, you would first need to add a preprocessing step (using libraries like PyPDF2 or python-docx) to extract the plain text and pass it to the parse_resume_text function.

← Back to Projects