""" Script to generate a combined CSV report of package licenses using cabal-plan. This script takes any number of Haskell package names, runs the `cabal-plan license-report` command for each package, and processes the resulting markdown output to generate a combined CSV file listing package dependencies with their names, versions, licenses, and descriptions. Usage: python script.py ... Example: python script.py graphql-engine-pro some-other-package output.csv Parameters: , , ... : Names of the packages to process. ex lib:graphql-engine, exe:graphql-engine : Name of the output CSV file where the combined dependencies will be saved. Description: - The script removes links, backticks, bold and italic formatting, and any remaining Markdown syntax from the content. - It processes the markdown content generated by the cabal-plan command for each package. - The script excludes specified packages from the final report. - It assigns specified SPDX License IDs to certain packages. - The combined dependencies are written to the specified output CSV file. Dependencies: - Python 3.x - cabal-plan (ensure it's installed and available in your PATH) Notes: - Ensure you have cabal-plan installed and the packages are available for the command to run successfully. - The script expects the cabal-plan license-report command to output markdown tables with specific columns. """ import re import csv import subprocess # Packages to remove PACKAGES_TO_REMOVE = [ 'arrows-extra', 'aeson-ordered', 'ci-info', 'dc-api', 'ekg-prometheus', 'graphql-engine', 'graphql-parser', 'hasura-base', 'hasura-error-message', 'hasura-extras', 'hasura-json-encoding', 'hasura-prelude', 'incremental', 'kriti-lang', 'libdeflate-hs', 'pg-client', 'schema-parsers' ] # Packages with specified SPDX Licenses PACKAGE_LICENSES = { 'ekg-json': 'BSD-3-Clause', 'odbc': 'BSD-3-Clause' } def clean_text(text): # Remove links text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # Remove backticks text = text.replace('`', '') # Remove bold and italic formatting text = re.sub(r'\*+([^*]+)\*+', r'\1', text) # Remove any remaining Markdown syntax text = re.sub(r'[#_~]', '', text) return text.strip() def parse_md_content(content): # Find the table content table_match = re.search(r'\| Name.*\n\|[-\s|]*\n((.|\n)*?)(\n\n|$)', content) if not table_match: return [] table_content = table_match.group(1) # Parse each row rows = [] for line in table_content.split('\n'): if line.strip(): cells = [clean_text(cell) for cell in line.split('|')[1:-1]] if len(cells) >= 4: rows.append(cells[:4]) # Only take the first 4 columns return rows def run_cabal_plan(package): command = f'cabal-plan license-report --licensedir=licenses {package}' result = subprocess.run(command, shell=True, capture_output=True, text=True) if result.returncode != 0: print(f"Error running command for package {package}: {result.stderr}") return "" return result.stdout def combine_dependencies(packages): combined_deps = {} for package in packages: md_content = run_cabal_plan(package) if not md_content: continue rows = parse_md_content(md_content) for row in rows: name, version, license, description = row if name not in PACKAGES_TO_REMOVE: if name in PACKAGE_LICENSES: license = PACKAGE_LICENSES[name] if name not in combined_deps or version > combined_deps[name][1]: combined_deps[name] = (name, version, license, description) return list(combined_deps.values()) def write_csv(dependencies, output_file): with open(output_file, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL) writer.writerow(['Name', 'Version', 'SPDX License Id', 'Description']) for dep in sorted(dependencies): writer.writerow(dep) if __name__ == "__main__": import sys if len(sys.argv) < 3: print("Usage: python script.py ... ") sys.exit(1) packages = sys.argv[1:-1] output_file = sys.argv[-1] combined_deps = combine_dependencies(packages) write_csv(combined_deps, output_file) print(f"Combined dependencies written to {output_file}")