Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from packagedcode import opam
from packagedcode import phpcomposer
from packagedcode import pubspec
from packagedcode import publiccode
from packagedcode import pypi
from packagedcode import readme
from packagedcode import rpm
Expand Down Expand Up @@ -77,6 +78,8 @@
conda.CondaMetaYamlHandler,
conda.CondaYamlHandler,

publiccode.PubliccodeYmlHandler,

conan.ConanFileHandler,
conan.ConanDataHandler,

Expand Down
151 changes: 151 additions & 0 deletions src/packagedcode/publiccode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import io

import saneyaml

from packagedcode import models

"""
Handle publiccode.yml metadata files.
publiccode.yml is a metadata standard for public sector open source software.
See https://github.com/publiccodeyml/publiccode.yml
"""

EXTRA_DATA_KEYS = (
'publiccodeYmlVersion',
'platforms',
'developmentStatus',
'softwareType',
)


class PubliccodeYmlHandler(models.DatafileHandler):
datasource_id = 'publiccode_yml'
path_patterns = ('*publiccode.yml', '*publiccode.yaml')
default_package_type = 'publiccode'
default_primary_language = None
description = 'publiccode.yml metadata file'
documentation_url = 'https://github.com/publiccodeyml/publiccode.yml'

@classmethod
def parse(cls, location, package_only=False):
with io.open(location, encoding='utf-8') as loc:
data = saneyaml.load(loc.read())

if not is_publiccode_yml_data(data):
return

package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=data.get('name'),
version=data.get('softwareVersion'),
vcs_url=data.get('url'),
homepage_url=data.get('landingURL') or data.get('url'),
description=get_description(data),
extracted_license_statement=get_extracted_license_statement(data),
copyright=get_copyright_statement(data),
keywords=get_categories(data),
parties=get_parties(data),
extra_data=get_extra_data(data) or None,
)
yield models.PackageData.from_data(package_data, package_only)


def is_publiccode_yml_data(data):
return isinstance(data, dict) and 'publiccodeYmlVersion' in data


def get_description(data):
"""
Extract the best available description from publiccode.yml's
multilingual 'description' block. Prefer English, fall back to
any available language. Returns longDescription, else shortDescription.
"""
description_block = data.get('description') or {}
if not description_block:
return

lang_data = None
for language, localized_description in description_block.items():
primary_language = language.lower().split('-')[0]
if primary_language == 'en':
lang_data = localized_description
break

if not lang_data:
lang_data = next(iter(description_block.values()), None)

if not lang_data:
return

long_desc = lang_data.get('longDescription', '').strip()
short_desc = lang_data.get('shortDescription', '').strip()

return long_desc or short_desc or None


def get_extracted_license_statement(data):
legal = data.get('legal') or {}
return legal.get('license')


def get_copyright_statement(data):
legal = data.get('legal') or {}
copyright_holders = []

for key in ('mainCopyrightOwner', 'repoOwner'):
value = legal.get(key)
if value and value not in copyright_holders:
copyright_holders.append(value)

return '\n'.join(copyright_holders) or None


def get_categories(data):
categories = data.get('categories') or []
if isinstance(categories, str):
return [categories]
return categories


def get_parties(data):
parties = []
maintenance = data.get('maintenance') or {}

for contact in maintenance.get('contacts') or []:
contact_name = contact.get('name')
contact_email = contact.get('email')

if not (contact_name or contact_email):
continue

parties.append(
models.Party(
type=models.party_person,
name=contact_name,
email=contact_email,
role='maintainer',
)
)

return parties


def get_extra_data(data):
extra_data = {}

for key in EXTRA_DATA_KEYS:
value = data.get(key)
if value:
extra_data[key] = value

return extra_data
49 changes: 49 additions & 0 deletions tests/packagedcode/data/publiccode/publiccode.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Hand-crafted publiccode.yml test fixture based on examples from:
# https://github.com/publiccodeyml/publiccode.yml/blob/main/docs/standard/schema.core.rst
publiccodeYmlVersion: "0.4"

name: Medusa
url: "https://example.com/italia/medusa.git"
landingURL: "https://example.com/medusa"
softwareVersion: "1.0.3"

platforms:
- web
- linux

categories:
- financial-reporting
- accounting

developmentStatus: stable
softwareType: "standalone/desktop"

description:
en:
shortDescription: >
A short description of this software.
longDescription: >
A very long description of this software. It explains what it does,
who it is for, and why you might want to use it in a public
administration context.
features:
- Feature one
- Feature two

legal:
license: AGPL-3.0-or-later
mainCopyrightOwner: City of Example
repoOwner: City of Example

maintenance:
type: "contract"
contacts:
- name: Francesco Rossi
email: f.rossi@example.com
affiliation: City of Example

localisation:
localisationReady: true
availableLanguages:
- en
- it
88 changes: 88 additions & 0 deletions tests/packagedcode/data/publiccode/publiccode.yml-expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
[
{
"type": "publiccode",
"namespace": null,
"name": "Medusa",
"version": "1.0.3",
"qualifiers": {},
"subpath": null,
"primary_language": null,
"description": "A very long description of this software. It explains what it does, who it is for, and why you might want to use it in a public administration context.",
"release_date": null,
"parties": [
{
"type": "person",
"role": "maintainer",
"name": "Francesco Rossi",
"email": "f.rossi@example.com",
"url": null
}
],
"keywords": [
"financial-reporting",
"accounting"
],
"homepage_url": "https://example.com/medusa",
"download_url": null,
"size": null,
"sha1": null,
"md5": null,
"sha256": null,
"sha512": null,
"bug_tracking_url": null,
"code_view_url": null,
"vcs_url": "https://example.com/italia/medusa.git",
"copyright": "City of Example",
"holder": "City of Example",
"declared_license_expression": "agpl-3.0-plus",
"declared_license_expression_spdx": "AGPL-3.0-or-later",
"license_detections": [
{
"license_expression": "agpl-3.0-plus",
"license_expression_spdx": "AGPL-3.0-or-later",
"matches": [
{
"license_expression": "agpl-3.0-plus",
"license_expression_spdx": "AGPL-3.0-or-later",
"from_file": null,
"start_line": 1,
"end_line": 1,
"matcher": "1-hash",
"score": 100.0,
"matched_length": 5,
"match_coverage": 100.0,
"rule_relevance": 100,
"rule_identifier": "spdx_license_id_agpl-3.0-or-later_for_agpl-3.0-plus.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/spdx_license_id_agpl-3.0-or-later_for_agpl-3.0-plus.RULE",
"matched_text": "AGPL-3.0-or-later"
}
],
"identifier": "agpl_3_0_plus-a0f62d44-7e99-852b-0b1c-0bc5e1c9f6d0"
}
],
"other_license_expression": null,
"other_license_expression_spdx": null,
"other_license_detections": [],
"extracted_license_statement": "AGPL-3.0-or-later",
"notice_text": null,
"source_packages": [],
"file_references": [],
"is_private": false,
"is_virtual": false,
"extra_data": {
"publiccodeYmlVersion": "0.4",
"platforms": [
"web",
"linux"
],
"developmentStatus": "stable",
"softwareType": "standalone/desktop"
},
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": null,
"datasource_id": "publiccode_yml",
"purl": "pkg:publiccode/Medusa@1.0.3"
}
]
Loading
Loading