graphql-engine/scripts/get_server_licenses.py

"""
Script to generate a combined CSV report of package licenses using cabal-plan.

This script takes any number of Haskell package names, runs the `cabal-plan license-report`
command for each package, and processes the resulting markdown output to generate a combined
CSV file listing package dependencies with their names, versions, licenses, and descriptions.

Usage:
    python script.py <package1> <package2> ... <output_file.csv>

Example:
    python script.py graphql-engine-pro some-other-package output.csv

Parameters:
    <package1>, <package2>, ... : Names of the packages to process. ex lib:graphql-engine, exe:graphql-engine
    <output_file.csv>           : Name of the output CSV file where the combined dependencies will be saved.

Description:
    - The script removes links, backticks, bold and italic formatting, and any remaining Markdown syntax from the content.
    - It processes the markdown content generated by the cabal-plan command for each package.
    - The script excludes specified packages from the final report.
    - It assigns specified SPDX License IDs to certain packages.
    - The combined dependencies are written to the specified output CSV file.

Dependencies:
    - Python 3.x
    - cabal-plan (ensure it's installed and available in your PATH)

Notes:
    - Ensure you have cabal-plan installed and the packages are available for the command to run successfully.
    - The script expects the cabal-plan license-report command to output markdown tables with specific columns.
"""

import re
import csv
import subprocess

# Packages to remove
PACKAGES_TO_REMOVE = [
    'arrows-extra',
    'aeson-ordered',
    'ci-info',
    'dc-api',
    'ekg-prometheus',
    'graphql-engine',
    'graphql-parser',
    'hasura-base',
    'hasura-error-message',
    'hasura-extras',
    'hasura-json-encoding',
    'hasura-prelude',
    'incremental',
    'kriti-lang',
    'libdeflate-hs',
    'pg-client',
    'schema-parsers'
]

# Packages with specified SPDX Licenses
PACKAGE_LICENSES = {
    'ekg-json': 'BSD-3-Clause',
    'odbc': 'BSD-3-Clause'
}

def clean_text(text):
    # Remove links
    text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
    # Remove backticks
    text = text.replace('`', '')
    # Remove bold and italic formatting
    text = re.sub(r'\*+([^*]+)\*+', r'\1', text)
    # Remove any remaining Markdown syntax
    text = re.sub(r'[#_~]', '', text)
    return text.strip()

def parse_md_content(content):
    # Find the table content
    table_match = re.search(r'\| Name.*\n\|[-\s|]*\n((.|\n)*?)(\n\n|$)', content)
    if not table_match:
        return []

    table_content = table_match.group(1)
    
    # Parse each row
    rows = []
    for line in table_content.split('\n'):
        if line.strip():
            cells = [clean_text(cell) for cell in line.split('|')[1:-1]]
            if len(cells) >= 4:
                rows.append(cells[:4])  # Only take the first 4 columns
    
    return rows

def run_cabal_plan(package):
    command = f'cabal-plan license-report --licensedir=licenses {package}'
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"Error running command for package {package}: {result.stderr}")
        return ""
    return result.stdout

def combine_dependencies(packages):
    combined_deps = {}
    
    for package in packages:
        md_content = run_cabal_plan(package)
        if not md_content:
            continue
        
        rows = parse_md_content(md_content)
        for row in rows:
            name, version, license, description = row
            if name not in PACKAGES_TO_REMOVE:
                if name in PACKAGE_LICENSES:
                    license = PACKAGE_LICENSES[name]
                if name not in combined_deps or version > combined_deps[name][1]:
                    combined_deps[name] = (name, version, license, description)
    
    return list(combined_deps.values())

def write_csv(dependencies, output_file):
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['Name', 'Version', 'SPDX License Id', 'Description'])
        for dep in sorted(dependencies):
            writer.writerow(dep)

if __name__ == "__main__":
    import sys

    if len(sys.argv) < 3:
        print("Usage: python script.py <package1> <package2> ... <output_file.csv>")
        sys.exit(1)

    packages = sys.argv[1:-1]
    output_file = sys.argv[-1]

    combined_deps = combine_dependencies(packages)
    write_csv(combined_deps, output_file)

    print(f"Combined dependencies written to {output_file}")
Add python script to get cabal package licenses PR-URL: https://github.com/hasura/graphql-engine-mono/pull/10928 GitOrigin-RevId: d26495c8c9975b9e4f98e322b6d5b2977e66c247 2024-07-10 00:37:33 +03:00			`"""`
			`Script to generate a combined CSV report of package licenses using cabal-plan.`

			This script takes any number of Haskell package names, runs the `cabal-plan license-report`
			`command for each package, and processes the resulting markdown output to generate a combined`
			`CSV file listing package dependencies with their names, versions, licenses, and descriptions.`

			`Usage:`
			`python script.py <package1> <package2> ... <output_file.csv>`

			`Example:`
			`python script.py graphql-engine-pro some-other-package output.csv`

			`Parameters:`
			`<package1>, <package2>, ... : Names of the packages to process. ex lib:graphql-engine, exe:graphql-engine`
			`<output_file.csv> : Name of the output CSV file where the combined dependencies will be saved.`

			`Description:`
			`- The script removes links, backticks, bold and italic formatting, and any remaining Markdown syntax from the content.`
			`- It processes the markdown content generated by the cabal-plan command for each package.`
			`- The script excludes specified packages from the final report.`
			`- It assigns specified SPDX License IDs to certain packages.`
			`- The combined dependencies are written to the specified output CSV file.`

			`Dependencies:`
			`- Python 3.x`
			`- cabal-plan (ensure it's installed and available in your PATH)`

			`Notes:`
			`- Ensure you have cabal-plan installed and the packages are available for the command to run successfully.`
			`- The script expects the cabal-plan license-report command to output markdown tables with specific columns.`
			`"""`

			`import re`
			`import csv`
			`import subprocess`

			`# Packages to remove`
			`PACKAGES_TO_REMOVE = [`
			`'arrows-extra',`
			`'aeson-ordered',`
			`'ci-info',`
			`'dc-api',`
			`'ekg-prometheus',`
			`'graphql-engine',`
			`'graphql-parser',`
			`'hasura-base',`
			`'hasura-error-message',`
			`'hasura-extras',`
			`'hasura-json-encoding',`
			`'hasura-prelude',`
			`'incremental',`
			`'kriti-lang',`
			`'libdeflate-hs',`
			`'pg-client',`
			`'schema-parsers'`
			`]`

			`# Packages with specified SPDX Licenses`
			`PACKAGE_LICENSES = {`
			`'ekg-json': 'BSD-3-Clause',`
			`'odbc': 'BSD-3-Clause'`
			`}`

			`def clean_text(text):`
			`# Remove links`
			`text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)`
			`# Remove backticks`
			text = text.replace('`', '')
			`# Remove bold and italic formatting`
			`text = re.sub(r'\+([^]+)\*+', r'\1', text)`
			`# Remove any remaining Markdown syntax`
			`text = re.sub(r'[#_~]', '', text)`
			`return text.strip()`

			`def parse_md_content(content):`
			`# Find the table content`
			`table_match = re.search(r'\\| Name.\n\\|[-\s\|]\n((.\|\n)*?)(\n\n\|$)', content)`
			`if not table_match:`
			`return []`

			`table_content = table_match.group(1)`

			`# Parse each row`
			`rows = []`
			`for line in table_content.split('\n'):`
			`if line.strip():`
			`cells = [clean_text(cell) for cell in line.split('\|')[1:-1]]`
			`if len(cells) >= 4:`
			`rows.append(cells[:4]) # Only take the first 4 columns`

			`return rows`

			`def run_cabal_plan(package):`
			`command = f'cabal-plan license-report --licensedir=licenses {package}'`
			`result = subprocess.run(command, shell=True, capture_output=True, text=True)`
			`if result.returncode != 0:`
			`print(f"Error running command for package {package}: {result.stderr}")`
			`return ""`
			`return result.stdout`

			`def combine_dependencies(packages):`
			`combined_deps = {}`

			`for package in packages:`
			`md_content = run_cabal_plan(package)`
			`if not md_content:`
			`continue`

			`rows = parse_md_content(md_content)`
			`for row in rows:`
			`name, version, license, description = row`
			`if name not in PACKAGES_TO_REMOVE:`
			`if name in PACKAGE_LICENSES:`
			`license = PACKAGE_LICENSES[name]`
			`if name not in combined_deps or version > combined_deps[name][1]:`
			`combined_deps[name] = (name, version, license, description)`

			`return list(combined_deps.values())`

			`def write_csv(dependencies, output_file):`
			`with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:`
			`writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)`
			`writer.writerow(['Name', 'Version', 'SPDX License Id', 'Description'])`
			`for dep in sorted(dependencies):`
			`writer.writerow(dep)`

			`if __name__ == "__main__":`
			`import sys`

			`if len(sys.argv) < 3:`
			`print("Usage: python script.py <package1> <package2> ... <output_file.csv>")`
			`sys.exit(1)`

			`packages = sys.argv[1:-1]`
			`output_file = sys.argv[-1]`

			`combined_deps = combine_dependencies(packages)`
			`write_csv(combined_deps, output_file)`

			`print(f"Combined dependencies written to {output_file}")`