Migrating Jenkins SCM Using GitLab from Bitbucket: SCM URL Bulk Replacement

Posted 2025-04-24

54~69 min read

When migrating from one source control management (SCM) system to another, one of the most tedious tasks is updating all your Jenkins job configurations to point to the new repository URLs. This article presents a solution to automate this process using Python and the Jenkins API, making your migration to GitLab smooth and efficient.

The Challenge

Imagine you have hundreds of Jenkins jobs configured to pull from your old SCM system, and you need to update all of them to use GitLab repositories instead. Doing this manually would be:

Time-consuming
Error-prone
Disruptive to your development workflow

Our solution uses Python scripts to:

Inventory all Jenkins jobs and their current SCM URLs
Replace old SCM URLs with new GitLab URLs in bulk

Prerequisites

Python 3.6+
Jenkins access with API token
new GitLab URLs

Part 1: Discovering Your Jenkins Jobs

The first step is to create an inventory of all your Jenkins jobs and their SCM URLs. This script recursively traverses your Jenkins folders and extracts the SCM URL from each job's configuration.

import requests
import xml.etree.ElementTree as ET
import csv
from urllib.parse import quote

Jenkins connection details
JENKINS_URL = "https://jenkins.example.com"
USERNAME = "your_username"
API_TOKEN = "your_api_token"  # Generate this in Jenkins user settings
FOLDER_PATH = "job/folder_name"  # Starting folder path

# Authentication
auth = (USERNAME, API_TOKEN)

def get_folder_jobs(folder_path):
    """Recursively get all jobs in a folder"""
    # URL encode each part of the path
    path_parts = folder_path.split("/job/")
    encoded_path = "/job/".join([quote(part) for part in path_parts if part])

    # Handle the case where folder_path doesn't start with /job/
    if not folder_path.startswith("/job/") and not encoded_path.startswith("/"):
        api_url = f"{JENKINS_URL}/{encoded_path}/api/json?tree=jobs[name,url,_class]"
    else:
        api_url = f"{JENKINS_URL}{encoded_path}/api/json?tree=jobs[name,url,_class]"

    print(f"Requesting: {api_url}")
    response = requests.get(api_url, auth=auth)
    if response.status_code != 200:
        print(f"Error accessing folder: {response.status_code}")
        return []

    data = response.json()
    all_jobs = []

    for job in data.get('jobs', []):
        job_class = job.get('_class', '')
        job_name = job.get('name', '')
        job_url = job.get('url', '')

        # If it's a folder, recursively get its jobs
        if 'folder' in job_class.lower():
            sub_folder_path = f"{folder_path}/job/{job_name}" if folder_path else f"job/{job_name}"
            sub_jobs = get_folder_jobs(sub_folder_path)
            all_jobs.extend(sub_jobs)
        # If it's a job, add it to the list
        elif 'job' in job_class.lower() or 'project' in job_class.lower() or 'workflow' in job_class.lower():
            all_jobs.append({
                'name': job_name,
                'url': job_url,
                'full_path': f"{folder_path}/job/{job_name}" if folder_path else f"job/{job_name}"
            })

    return all_jobs

def get_job_config(job_url):
    """Get the XML configuration of a job"""
    config_url = f"{job_url}config.xml"
    response = requests.get(config_url, auth=auth)
    if response.status_code != 200:
        print(f"Error getting config for {job_url}: {response.status_code}")
        return None
    return response.text

def extract_scm_url(config_xml):
    """Extract SCM URL from config XML"""
    try:
        root = ET.fromstring(config_xml)
        # Find Git SCM URLs
        url_elements = root.findall(".//hudson.plugins.git.UserRemoteConfig/url")
        
        if url_elements and len(url_elements) > 0:
            return url_elements[0].text

        # Check for MultiBranch Pipeline SCM URLs
        url_elements = root.findall(".//source/remote")
        if url_elements and len(url_elements) > 0:
            return url_elements[0].text

        return None
    except Exception as e:
        print(f"Error parsing XML: {e}")
        return None

def main():
    # Get all jobs in the folder
    print(f"Getting jobs from folder: {FOLDER_PATH}")
    jobs = get_folder_jobs(FOLDER_PATH)
    print(f"Found {len(jobs)} jobs")

    # Process each job to extract SCM URL
    job_scm_urls = []
    for job in jobs:
        print(f"Processing job: {job['name']}")
        config_xml = get_job_config(job['url'])
        if not config_xml:
            continue

        scm_url = extract_scm_url(config_xml)
        if scm_url:
            job_scm_urls.append({
                'name': job['name'],
                'full_path': job['full_path'],
                'url': job['url'],
                'scm_url': scm_url
            })
            print(f"Found SCM URL for {job['name']}: {scm_url}")
        else:
            print(f"No SCM URL found for {job['name']}")

    # Print summary
    print(f"\nFound SCM URLs for {len(job_scm_urls)} jobs out of {len(jobs)} total jobs")

    # Export results to CSV
    with open('jenkins_scm_urls.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["Job Name", "Full Path", "Jenkins URL", "SCM URL"])
        for job in job_scm_urls:
            writer.writerow([job['name'], job['full_path'], job['url'], job['scm_url']])

    print("\nExported results to jenkins_scm_urls.csv")

if __name__ == "__main__":
    main()

Part 2: Preparing Your URL Mapping

After running the first script, you'll have a CSV file with all your Jenkins jobs and their current SCM URLs.

Now you need to create a mapping file that specifies the old and new URLs. Create a new CSV file called replace.csv with these columns:

Jenkins URL
Old SCM URL
New SCM URL

Example:

Jenkins URL	Old SCM URL	New SCM URL
https://jenkins.example.com/job/project1	https://old-scm.com/project1.git	https://gitlab.com/org/project1.git
https://jenkins.example.com/job/project2	https://old-scm.com/project2.git	https://gitlab.com/org/project2.git

You can use Excel or a script to help generate this mapping based on patterns in your repository structure.

Part 3: Updating Jenkins Jobs

Now comes to updating all your Jenkins jobs to use the new GitLab URLs:

import requests
import xml.etree.ElementTree as ET
import base64
import csv
import time

# Jenkins connection details
JENKINS_URL = "https://jenkins.example.com"
USERNAME = "your_username"
API_TOKEN = "your_api_token"
CSV_FILE = "replace.csv"  # CSV with columns: Jenkins URL, Old SCM URL, New SCM URL

# Authentication headers
headers = {
    'Authorization': 'Basic ' + base64.b64encode(f"{USERNAME}:{API_TOKEN}".encode()).decode(),
    'Content-Type': 'application/xml'
}

def get_job_config(job_url):
    """Get the XML configuration of a job"""
    config_url = f"{job_url}config.xml"
    response = requests.get(config_url, headers=headers)
    if response.status_code != 200:
        print(f"Error getting config for {job_url}: {response.status_code}")
        return None
    return response.text

def update_job_config(job_url, config_xml):
    """Update the XML configuration of a job"""
    config_url = f"{job_url}config.xml"
    response = requests.post(config_url, headers=headers, data=config_xml)
    return response.status_code in [200, 201]

def update_scm_url_in_xml(config_xml, old_url, new_url):
    """Update SCM URL in job config XML"""
    try:
        root = ET.fromstring(config_xml)
        updated = False

        # Update Git SCM URLs
        for url_elem in root.findall(".//hudson.plugins.git.UserRemoteConfig/url"):
            if url_elem.text == old_url:
                url_elem.text = new_url
                updated = True

        # Update MultiBranch Pipeline SCM URLs
        for url_elem in root.findall(".//source/remote"):
            if url_elem.text == old_url:
                url_elem.text = new_url
                updated = True

        if updated:
            return ET.tostring(root, encoding='utf-8').decode('utf-8')
        return None
    except Exception as e:
        print(f"Error updating XML: {e}")
        return None

def main():
    updated_jobs = 0
    failed_jobs = 0
    skipped_jobs = 0

    with open(CSV_FILE, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        total_rows = sum(1 for _ in open(CSV_FILE)) - 1  # Subtract header row
        
        print(f"Found {total_rows} jobs to update in {CSV_FILE}")
        
        # Reset file pointer
        csvfile.seek(0)
        next(reader)  # Skip header
        
        for row in reader:
            job_url = row.get('Jenkins URL', '').strip()
            old_scm_url = row.get('Old SCM URL', '').strip()
            new_scm_url = row.get('New SCM URL', '').strip()

            if not job_url or not old_scm_url or not new_scm_url:
                print(f"Skipping row with missing data: {row}")
                skipped_jobs += 1
                continue

            print(f"Processing job: {job_url}")
            print(f"  Changing SCM from: {old_scm_url}")
            print(f"  To: {new_scm_url}")

            # Get current config
            config_xml = get_job_config(job_url)
            if not config_xml:
                print(f"  Failed to get config for {job_url}")
                failed_jobs += 1
                continue

            # Update SCM URL in config
            updated_xml = update_scm_url_in_xml(config_xml, old_scm_url, new_scm_url)
            if not updated_xml:
                print(f"  No matching SCM URL found in config or error updating XML")
                failed_jobs += 1
                continue

            # Update job config
            if update_job_config(job_url, updated_xml):
                print(f"  ✅ Successfully updated {job_url}")
                updated_jobs += 1
            else:
                print(f"  ❌ Failed to update {job_url}")
                failed_jobs += 1

            # Add a small delay to avoid overwhelming the Jenkins server
            time.sleep(1)

    print(f"\n📊 Summary:")
    print(f"  ✅ Updated: {updated_jobs} jobs")
    print(f"  ❌ Failed: {failed_jobs} jobs")
    print(f"  ⏭️ Skipped: {skipped_jobs} jobs")
    print(f"  📋 Total: {total_rows} jobs")

if __name__ == "__main__":
    main()

Tips

Testing First

Before running the update script on all your jobs, test it on a few non-critical jobs

Create a smaller CSV file with just a few test jobs

Run the update script with this file
Verify that the jobs were updated correctly
Check that the jobs still work with the new SCM URLs

Rate Limiting

The update script includes a 1-second delay between job updates to avoid overwhelming your Jenkins server. Adjust this value based on your server's performance.

Devops