graphql-engine/scripts/get_server_licenses.py

"""
Script to generate a combined CSV report of package licenses using cabal-plan.

This script takes any number of Haskell package names, runs the `cabal-plan license-report`
command for each package, and processes the resulting markdown output to generate a combined
CSV file listing package dependencies with their names, versions, licenses, and descriptions.

Usage:
    python script.py <package1> <package2> ... <output_file.csv>

Example:
    python script.py graphql-engine-pro some-other-package output.csv

Parameters:
    <package1>, <package2>, ... : Names of the packages to process. ex lib:graphql-engine, exe:graphql-engine
    <output_file.csv>           : Name of the output CSV file where the combined dependencies will be saved.

Description:
    - The script removes links, backticks, bold and italic formatting, and any remaining Markdown syntax from the content.
    - It processes the markdown content generated by the cabal-plan command for each package.
    - The script excludes specified packages from the final report.
    - It assigns specified SPDX License IDs to certain packages.
    - The combined dependencies are written to the specified output CSV file.

Dependencies:
    - Python 3.x
    - cabal-plan (ensure it's installed and available in your PATH)

Notes:
    - Ensure you have cabal-plan installed and the packages are available for the command to run successfully.
    - The script expects the cabal-plan license-report command to output markdown tables with specific columns.
"""

import re
import csv
import subprocess

# Packages to remove
PACKAGES_TO_REMOVE = [
    'arrows-extra',
    'aeson-ordered',
    'ci-info',
    'dc-api',
    'ekg-prometheus',
    'graphql-engine',
    'graphql-parser',
    'hasura-base',
    'hasura-error-message',
    'hasura-extras',
    'hasura-json-encoding',
    'hasura-prelude',
    'incremental',
    'kriti-lang',
    'libdeflate-hs',
    'pg-client',
    'schema-parsers'
]

# Packages with specified SPDX Licenses
PACKAGE_LICENSES = {
    'ekg-json': 'BSD-3-Clause',
    'odbc': 'BSD-3-Clause'
}

def clean_text(text):
    # Remove links
    text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
    # Remove backticks
    text = text.replace('`', '')
    # Remove bold and italic formatting
    text = re.sub(r'\*+([^*]+)\*+', r'\1', text)
    # Remove any remaining Markdown syntax
    text = re.sub(r'[#_~]', '', text)
    return text.strip()

def parse_md_content(content):
    # Find the table content
    table_match = re.search(r'\| Name.*\n\|[-\s|]*\n((.|\n)*?)(\n\n|$)', content)
    if not table_match:
        return []

    table_content = table_match.group(1)

    # Parse each row
    rows = []
    for line in table_content.split('\n'):
        if line.strip():
            cells = [clean_text(cell) for cell in line.split('|')[1:-1]]
            if len(cells) >= 4:
                rows.append(cells[:4])  # Only take the first 4 columns

    return rows

def run_cabal_plan(package):
    command = f'cabal-plan license-report --licensedir=licenses {package}'
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"Error running command for package {package}: {result.stderr}")
        return ""
    return result.stdout

def combine_dependencies(packages):
    combined_deps = {}

    for package in packages:
        md_content = run_cabal_plan(package)
        if not md_content:
            continue

        rows = parse_md_content(md_content)
        for row in rows:
            name, version, license, description = row
            if name not in PACKAGES_TO_REMOVE:
                if name in PACKAGE_LICENSES:
                    license = PACKAGE_LICENSES[name]
                if name not in combined_deps or version > combined_deps[name][1]:
                    combined_deps[name] = (name, version, license, description)

    return list(combined_deps.values())

def write_csv(dependencies, output_file):
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['Name', 'Version', 'SPDX License Id', 'Description'])
        for dep in sorted(dependencies):
            writer.writerow(dep)

if __name__ == "__main__":
    import sys

    if len(sys.argv) < 3:
        print("Usage: python script.py <package1> <package2> ... <output_file.csv>")
        sys.exit(1)

    packages = sys.argv[1:-1]
    output_file = sys.argv[-1]

    combined_deps = combine_dependencies(packages)
    write_csv(combined_deps, output_file)

    print(f"Combined dependencies written to {output_file}")