Migrating Jenkins SCM Using GitLab from Bitbucket: SCM URL Bulk Replacement
When migrating from one source control management (SCM) system to another, one of the most tedious tasks is updating all your Jenkins job configurations to point to the new repository URLs. This article presents a solution to automate this process using Python and the Jenkins API, making your migration to GitLab smooth and efficient.
The Challenge
Imagine you have hundreds of Jenkins jobs configured to pull from your old SCM system, and you need to update all of them to use GitLab repositories instead. Doing this manually would be:
- Time-consuming
- Error-prone
- Disruptive to your development workflow
Our solution uses Python scripts to:
- Inventory all Jenkins jobs and their current SCM URLs
- Replace old SCM URLs with new GitLab URLs in bulk
Prerequisites
- Python 3.6+
- Jenkins access with API token
- new GitLab URLs
Part 1: Discovering Your Jenkins Jobs
The first step is to create an inventory of all your Jenkins jobs and their SCM URLs. This script recursively traverses your Jenkins folders and extracts the SCM URL from each job's configuration.
import requests
import xml.etree.ElementTree as ET
import csv
from urllib.parse import quote
Jenkins connection details
JENKINS_URL = "https://jenkins.example.com"
USERNAME = "your_username"
API_TOKEN = "your_api_token" # Generate this in Jenkins user settings
FOLDER_PATH = "job/folder_name" # Starting folder path
# Authentication
auth = (USERNAME, API_TOKEN)
def get_folder_jobs(folder_path):
"""Recursively get all jobs in a folder"""
# URL encode each part of the path
path_parts = folder_path.split("/job/")
encoded_path = "/job/".join([quote(part) for part in path_parts if part])
# Handle the case where folder_path doesn't start with /job/
if not folder_path.startswith("/job/") and not encoded_path.startswith("/"):
api_url = f"{JENKINS_URL}/{encoded_path}/api/json?tree=jobs[name,url,_class]"
else:
api_url = f"{JENKINS_URL}{encoded_path}/api/json?tree=jobs[name,url,_class]"
print(f"Requesting: {api_url}")
response = requests.get(api_url, auth=auth)
if response.status_code != 200:
print(f"Error accessing folder: {response.status_code}")
return []
data = response.json()
all_jobs = []
for job in data.get('jobs', []):
job_class = job.get('_class', '')
job_name = job.get('name', '')
job_url = job.get('url', '')
# If it's a folder, recursively get its jobs
if 'folder' in job_class.lower():
sub_folder_path = f"{folder_path}/job/{job_name}" if folder_path else f"job/{job_name}"
sub_jobs = get_folder_jobs(sub_folder_path)
all_jobs.extend(sub_jobs)
# If it's a job, add it to the list
elif 'job' in job_class.lower() or 'project' in job_class.lower() or 'workflow' in job_class.lower():
all_jobs.append({
'name': job_name,
'url': job_url,
'full_path': f"{folder_path}/job/{job_name}" if folder_path else f"job/{job_name}"
})
return all_jobs
def get_job_config(job_url):
"""Get the XML configuration of a job"""
config_url = f"{job_url}config.xml"
response = requests.get(config_url, auth=auth)
if response.status_code != 200:
print(f"Error getting config for {job_url}: {response.status_code}")
return None
return response.text
def extract_scm_url(config_xml):
"""Extract SCM URL from config XML"""
try:
root = ET.fromstring(config_xml)
# Find Git SCM URLs
url_elements = root.findall(".//hudson.plugins.git.UserRemoteConfig/url")
if url_elements and len(url_elements) > 0:
return url_elements[0].text
# Check for MultiBranch Pipeline SCM URLs
url_elements = root.findall(".//source/remote")
if url_elements and len(url_elements) > 0:
return url_elements[0].text
return None
except Exception as e:
print(f"Error parsing XML: {e}")
return None
def main():
# Get all jobs in the folder
print(f"Getting jobs from folder: {FOLDER_PATH}")
jobs = get_folder_jobs(FOLDER_PATH)
print(f"Found {len(jobs)} jobs")
# Process each job to extract SCM URL
job_scm_urls = []
for job in jobs:
print(f"Processing job: {job['name']}")
config_xml = get_job_config(job['url'])
if not config_xml:
continue
scm_url = extract_scm_url(config_xml)
if scm_url:
job_scm_urls.append({
'name': job['name'],
'full_path': job['full_path'],
'url': job['url'],
'scm_url': scm_url
})
print(f"Found SCM URL for {job['name']}: {scm_url}")
else:
print(f"No SCM URL found for {job['name']}")
# Print summary
print(f"\nFound SCM URLs for {len(job_scm_urls)} jobs out of {len(jobs)} total jobs")
# Export results to CSV
with open('jenkins_scm_urls.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(["Job Name", "Full Path", "Jenkins URL", "SCM URL"])
for job in job_scm_urls:
writer.writerow([job['name'], job['full_path'], job['url'], job['scm_url']])
print("\nExported results to jenkins_scm_urls.csv")
if __name__ == "__main__":
main()
Part 2: Preparing Your URL Mapping
After running the first script, you'll have a CSV file with all your Jenkins jobs and their current SCM URLs.
Now you need to create a mapping file that specifies the old and new URLs. Create a new CSV file called replace.csv with these columns:
- Jenkins URL
- Old SCM URL
- New SCM URL
Example:
You can use Excel or a script to help generate this mapping based on patterns in your repository structure.
Part 3: Updating Jenkins Jobs
Now comes to updating all your Jenkins jobs to use the new GitLab URLs:
import requests
import xml.etree.ElementTree as ET
import base64
import csv
import time
# Jenkins connection details
JENKINS_URL = "https://jenkins.example.com"
USERNAME = "your_username"
API_TOKEN = "your_api_token"
CSV_FILE = "replace.csv" # CSV with columns: Jenkins URL, Old SCM URL, New SCM URL
# Authentication headers
headers = {
'Authorization': 'Basic ' + base64.b64encode(f"{USERNAME}:{API_TOKEN}".encode()).decode(),
'Content-Type': 'application/xml'
}
def get_job_config(job_url):
"""Get the XML configuration of a job"""
config_url = f"{job_url}config.xml"
response = requests.get(config_url, headers=headers)
if response.status_code != 200:
print(f"Error getting config for {job_url}: {response.status_code}")
return None
return response.text
def update_job_config(job_url, config_xml):
"""Update the XML configuration of a job"""
config_url = f"{job_url}config.xml"
response = requests.post(config_url, headers=headers, data=config_xml)
return response.status_code in [200, 201]
def update_scm_url_in_xml(config_xml, old_url, new_url):
"""Update SCM URL in job config XML"""
try:
root = ET.fromstring(config_xml)
updated = False
# Update Git SCM URLs
for url_elem in root.findall(".//hudson.plugins.git.UserRemoteConfig/url"):
if url_elem.text == old_url:
url_elem.text = new_url
updated = True
# Update MultiBranch Pipeline SCM URLs
for url_elem in root.findall(".//source/remote"):
if url_elem.text == old_url:
url_elem.text = new_url
updated = True
if updated:
return ET.tostring(root, encoding='utf-8').decode('utf-8')
return None
except Exception as e:
print(f"Error updating XML: {e}")
return None
def main():
updated_jobs = 0
failed_jobs = 0
skipped_jobs = 0
with open(CSV_FILE, 'r') as csvfile:
reader = csv.DictReader(csvfile)
total_rows = sum(1 for _ in open(CSV_FILE)) - 1 # Subtract header row
print(f"Found {total_rows} jobs to update in {CSV_FILE}")
# Reset file pointer
csvfile.seek(0)
next(reader) # Skip header
for row in reader:
job_url = row.get('Jenkins URL', '').strip()
old_scm_url = row.get('Old SCM URL', '').strip()
new_scm_url = row.get('New SCM URL', '').strip()
if not job_url or not old_scm_url or not new_scm_url:
print(f"Skipping row with missing data: {row}")
skipped_jobs += 1
continue
print(f"Processing job: {job_url}")
print(f" Changing SCM from: {old_scm_url}")
print(f" To: {new_scm_url}")
# Get current config
config_xml = get_job_config(job_url)
if not config_xml:
print(f" Failed to get config for {job_url}")
failed_jobs += 1
continue
# Update SCM URL in config
updated_xml = update_scm_url_in_xml(config_xml, old_scm_url, new_scm_url)
if not updated_xml:
print(f" No matching SCM URL found in config or error updating XML")
failed_jobs += 1
continue
# Update job config
if update_job_config(job_url, updated_xml):
print(f" ✅ Successfully updated {job_url}")
updated_jobs += 1
else:
print(f" ❌ Failed to update {job_url}")
failed_jobs += 1
# Add a small delay to avoid overwhelming the Jenkins server
time.sleep(1)
print(f"\n📊 Summary:")
print(f" ✅ Updated: {updated_jobs} jobs")
print(f" ❌ Failed: {failed_jobs} jobs")
print(f" ⏭️ Skipped: {skipped_jobs} jobs")
print(f" 📋 Total: {total_rows} jobs")
if __name__ == "__main__":
main()
Tips
Testing First
Before running the update script on all your jobs, test it on a few non-critical jobs
Create a smaller CSV file with just a few test jobs
- Run the update script with this file
- Verify that the jobs were updated correctly
- Check that the jobs still work with the new SCM URLs
Rate Limiting
The update script includes a 1-second delay between job updates to avoid overwhelming your Jenkins server. Adjust this value based on your server's performance.