Resume Parser(AI) in Python
About the project: A Resume Parser is an excellent use case for structured Generative AI because it needs to take unstructured text and map it to specific, predictable data fields.
The file resume_parser.py, simulates this process. It uses a predefined JSON schema and the Gemini API to extract key information (Name, Email, Skills, Experience, Education) from a block of resume text.
Prerequisites:
You'll need the requests library to make the API calls:
pip install requests
Resume Parser Project Code
The code includes the necessary API call structure, exponential backoff for reliability, and a detailed JSON schema to guide the model's output.
import requests
import json
import time
import os
# --- Configuration ---
# IMPORTANT: The apiKey will be automatically injected in the runtime environment.
# Leave it as an empty string.
API_KEY = ""
MODEL_NAME = "gemini-2.5-flash-preview-05-20"
API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL_NAME}:generateContent?key={API_KEY}"
# --- Structured Output Schema for Resume Data ---
# This schema dictates exactly how the model MUST format the output.
RESUME_SCHEMA = {
"type": "OBJECT",
"properties": {
"name": {"type": "STRING", "description": "Full name of the candidate."},
"email": {"type": "STRING", "description": "Candidate's primary email address."},
"phone": {"type": "STRING", "description": "Candidate's phone number."},
"total_years_experience": {"type": "NUMBER", "description": "Total professional experience in years (e.g., 5.5)."},
"skills": {
"type": "ARRAY",
"description": "A list of technical and soft skills.",
"items": {"type": "STRING"}
},
"education": {
"type": "ARRAY",
"description": "List of degrees, institutions, and completion years.",
"items": {
"type": "OBJECT",
"properties": {
"institution": {"type": "STRING"},
"degree": {"type": "STRING"},
"year_graduated": {"type": "NUMBER"}
},
"propertyOrdering": ["institution", "degree", "year_graduated"]
}
},
"experience": {
"type": "ARRAY",
"description": "List of work experience entries.",
"items": {
"type": "OBJECT",
"properties": {
"title": {"type": "STRING"},
"company": {"type": "STRING"},
"duration": {"type": "STRING", "description": "Duration worked, e.g., 'Jan 2020 - Dec 2022'"},
"summary": {"type": "STRING", "description": "A brief 1-2 sentence summary of responsibilities and achievements."}
},
"propertyOrdering": ["title", "company", "duration", "summary"]
}
}
},
"propertyOrdering": ["name", "email", "phone", "total_years_experience", "skills", "education", "experience"]
}
# --- LLM System Instruction ---
SYSTEM_PROMPT = (
"You are a world-class Resume Parsing AI. Your task is to accurately extract key "
"information from the provided resume text and strictly format the output as a JSON "
"object following the provided schema. Analyze the text for name, contact details, "
"skills, education history, and work experience. Provide all dates and numbers accurately."
)
def exponential_backoff_fetch(url, payload, max_retries=5):
"""
Fetches content from the API with exponential backoff for handling transient errors.
"""
headers = {'Content-Type': 'application/json'}
for attempt in range(max_retries):
try:
response = requests.post(url, headers=headers, data=json.dumps(payload))
response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
return response.json()
except requests.exceptions.RequestException as e:
# Check if this is the final attempt
if attempt == max_retries - 1:
print(f"[Error] Final attempt failed. Could not reach API: {e}")
return None
# Log and calculate backoff
wait_time = 2 ** attempt
print(f"[API Error] Attempt {attempt+1}/{max_retries}: {e}. Retrying in {wait_time}s...")
time.sleep(wait_time)
return None
def parse_resume_text(resume_text):
"""
Uses the Gemini API with structured output to parse the resume text.
"""
print("Sending resume text to Gemini API for structured parsing...")
payload = {
"contents": [{"parts": [{"text": resume_text}]}],
"systemInstruction": {"parts": [{"text": SYSTEM_PROMPT}]},
"config": {
"responseMimeType": "application/json",
"responseSchema": RESUME_SCHEMA,
}
}
try:
api_response = exponential_backoff_fetch(API_URL, payload)
if not api_response:
print("Failed to get a response from the API after multiple retries.")
return None
# Extract the JSON string from the API response structure
json_string = api_response.get('candidates', [{}])[0]\
.get('content', {})\
.get('parts', [{}])[0]\
.get('text')
if not json_string:
print("API response structure was invalid or missing JSON content.")
return None
# Parse the JSON string into a Python dictionary
parsed_data = json.loads(json_string)
return parsed_data
except json.JSONDecodeError:
print("Error: Failed to decode JSON from API response.")
print(f"Received text (may be truncated): {json_string[:500]}...")
return None
except Exception as e:
print(f"An unexpected error occurred during parsing: {e}")
return None
# --- Main Execution ---
if __name__ == "__main__":
# In a real application, this text would come from a PDF/DOCX reader (e.g., pdfminer.six, python-docx)
SAMPLE_RESUME_TEXT = """
Software Development Lead
John D. Smith | john.smith@example.com | (555) 123-4567
Total Experience: 8 years
Summary
Senior Software Engineer with 8 years of experience specializing in scalable backend services
and cloud infrastructure (AWS). Proven ability to lead cross-functional teams and deliver
high-performance applications.
Skills
Python, Django, Flask, AWS, Docker, Kubernetes, PostgreSQL, MongoDB, REST APIs, Agile, Leadership
Experience
1. Lead Backend Engineer | TechCorp Solutions | Jan 2020 - Present
Led a team of 5 engineers in designing and deploying a microservices architecture using Python and AWS Lambda, resulting in a 30% reduction in latency.
2. Software Developer | Innovate Systems | Jul 2017 - Dec 2019
Developed and maintained core API services for a popular e-commerce platform using Django. Implemented unit and integration tests, improving code coverage by 45%.
Education
Master of Science in Computer Science | Stanford University | 2017
Bachelor of Engineering in Electrical Engineering | UC Berkeley | 2015
"""
# 1. Parse the resume
parsed_resume = parse_resume_text(SAMPLE_RESUME_TEXT)
# 2. Display the results
print("\n" + "="*50)
if parsed_resume:
print("✅ Resume Parsing Complete (Structured Data Output)")
print("="*50)
# Display key extracted fields
print(f"Name: {parsed_resume.get('name')}")
print(f"Email: {parsed_resume.get('email')}")
print(f"Total Experience: {parsed_resume.get('total_years_experience')} years")
print("-" * 50)
print("Skills:")
print(f" {', '.join(parsed_resume.get('skills', []))}")
print("-" * 50)
print("Education:")
for edu in parsed_resume.get('education', []):
print(f" {edu.get('degree')} from {edu.get('institution')} ({edu.get('year_graduated')})")
print("-" * 50)
print("Experience:")
for exp in parsed_resume.get('experience', []):
print(f" - {exp.get('title')} at {exp.get('company')}")
print(f" Duration: {exp.get('duration')}")
print(f" Summary: {exp.get('summary')}")
# Optional: Print the raw JSON output
# print("\n--- RAW JSON OUTPUT ---")
# print(json.dumps(parsed_resume, indent=2))
else:
print("❌ Resume Parsing Failed.")
print("="*50)
This code demonstrates the full workflow for an AI-powered parser: reading the text, defining a strict output structure, calling the LLM, and handling the JSON response.
To use this with actual PDF/DOCX files, you would first need to add a preprocessing step (using libraries like PyPDF2 or python-docx) to extract the plain text and pass it to the parse_resume_text function.
← Back to Projects