Add python script to get cabal package licenses

PR-URL: https://github.com/hasura/graphql-engine-mono/pull/10928
GitOrigin-RevId: d26495c8c9975b9e4f98e322b6d5b2977e66c247
This commit is contained in:
Brandon Martin 2024-07-09 15:37:33 -06:00 committed by hasura-bot
parent a94bace075
commit b2e0843045
2 changed files with 143 additions and 0 deletions

2
.gitignore vendored
View File

@ -42,6 +42,8 @@ tags
docs/_build/ docs/_build/
docs/_ext/ docs/_ext/
# cabal-plan artifacts
licenses/
# Ignore benchmark report output # Ignore benchmark report output
server/benchmarks/benchmark_sets/*/report.json server/benchmarks/benchmark_sets/*/report.json

View File

@ -0,0 +1,141 @@
"""
Script to generate a combined CSV report of package licenses using cabal-plan.
This script takes any number of Haskell package names, runs the `cabal-plan license-report`
command for each package, and processes the resulting markdown output to generate a combined
CSV file listing package dependencies with their names, versions, licenses, and descriptions.
Usage:
python script.py <package1> <package2> ... <output_file.csv>
Example:
python script.py graphql-engine-pro some-other-package output.csv
Parameters:
<package1>, <package2>, ... : Names of the packages to process. ex lib:graphql-engine, exe:graphql-engine
<output_file.csv> : Name of the output CSV file where the combined dependencies will be saved.
Description:
- The script removes links, backticks, bold and italic formatting, and any remaining Markdown syntax from the content.
- It processes the markdown content generated by the cabal-plan command for each package.
- The script excludes specified packages from the final report.
- It assigns specified SPDX License IDs to certain packages.
- The combined dependencies are written to the specified output CSV file.
Dependencies:
- Python 3.x
- cabal-plan (ensure it's installed and available in your PATH)
Notes:
- Ensure you have cabal-plan installed and the packages are available for the command to run successfully.
- The script expects the cabal-plan license-report command to output markdown tables with specific columns.
"""
import re
import csv
import subprocess
# Packages to remove
PACKAGES_TO_REMOVE = [
'arrows-extra',
'aeson-ordered',
'ci-info',
'dc-api',
'ekg-prometheus',
'graphql-engine',
'graphql-parser',
'hasura-base',
'hasura-error-message',
'hasura-extras',
'hasura-json-encoding',
'hasura-prelude',
'incremental',
'kriti-lang',
'libdeflate-hs',
'pg-client',
'schema-parsers'
]
# Packages with specified SPDX Licenses
PACKAGE_LICENSES = {
'ekg-json': 'BSD-3-Clause',
'odbc': 'BSD-3-Clause'
}
def clean_text(text):
# Remove links
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
# Remove backticks
text = text.replace('`', '')
# Remove bold and italic formatting
text = re.sub(r'\*+([^*]+)\*+', r'\1', text)
# Remove any remaining Markdown syntax
text = re.sub(r'[#_~]', '', text)
return text.strip()
def parse_md_content(content):
# Find the table content
table_match = re.search(r'\| Name.*\n\|[-\s|]*\n((.|\n)*?)(\n\n|$)', content)
if not table_match:
return []
table_content = table_match.group(1)
# Parse each row
rows = []
for line in table_content.split('\n'):
if line.strip():
cells = [clean_text(cell) for cell in line.split('|')[1:-1]]
if len(cells) >= 4:
rows.append(cells[:4]) # Only take the first 4 columns
return rows
def run_cabal_plan(package):
command = f'cabal-plan license-report --licensedir=licenses {package}'
result = subprocess.run(command, shell=True, capture_output=True, text=True)
if result.returncode != 0:
print(f"Error running command for package {package}: {result.stderr}")
return ""
return result.stdout
def combine_dependencies(packages):
combined_deps = {}
for package in packages:
md_content = run_cabal_plan(package)
if not md_content:
continue
rows = parse_md_content(md_content)
for row in rows:
name, version, license, description = row
if name not in PACKAGES_TO_REMOVE:
if name in PACKAGE_LICENSES:
license = PACKAGE_LICENSES[name]
if name not in combined_deps or version > combined_deps[name][1]:
combined_deps[name] = (name, version, license, description)
return list(combined_deps.values())
def write_csv(dependencies, output_file):
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Name', 'Version', 'SPDX License Id', 'Description'])
for dep in sorted(dependencies):
writer.writerow(dep)
if __name__ == "__main__":
import sys
if len(sys.argv) < 3:
print("Usage: python script.py <package1> <package2> ... <output_file.csv>")
sys.exit(1)
packages = sys.argv[1:-1]
output_file = sys.argv[-1]
combined_deps = combine_dependencies(packages)
write_csv(combined_deps, output_file)
print(f"Combined dependencies written to {output_file}")