Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 95 additions & 32 deletions frictionless/fields/field_descriptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import datetime
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union

import pydantic
from pydantic import Field as PydanticField, AliasChoices, model_validator, BaseModel
from typing_extensions import Self

from .. import settings
Expand All @@ -21,7 +21,7 @@
"""Python equivalents of types supported by the Table schema specification"""


class BaseFieldDescriptor(pydantic.BaseModel):
class BaseFieldDescriptor(BaseModel):
"""Data model of a (unspecialised) field descriptor"""

name: str
Expand All @@ -39,19 +39,19 @@ class BaseFieldDescriptor(pydantic.BaseModel):
A description for this field e.g. “The recipient of the funds”
"""

missing_values: Optional[List[str]] = pydantic.Field(
missing_values: Optional[List[str]] = PydanticField(
default=None, alias="missingValues"
)
"""
A list of field values to consider as null values
"""

example: Optional[str] = None
example: Optional[Any] = None
"""
An example of a value for the field.
"""

@pydantic.model_validator(mode="before")
@model_validator(mode="before")
@classmethod
def compat(cls, data: Dict[str, Any]) -> Dict[str, Any]:
# Backward compatibility for field.format
Expand All @@ -63,6 +63,20 @@ def compat(cls, data: Dict[str, Any]) -> Dict[str, Any]:

return data

@model_validator(mode="after")
def validate_example(self) -> Self:
"""Validate that the example value can be converted using read_value() if available"""
if self.example is not None:
if hasattr(self, "read_value"):
read_value_method = getattr(self, "read_value")
result = read_value_method(self.example)
if result is None:
raise ValueError(
f'example value "{self.example}" for field "{self.name}" is not valid'
)

return self


class BooleanFieldDescriptor(BaseFieldDescriptor):
"""The field contains boolean (true/false) data."""
Expand All @@ -72,19 +86,19 @@ class BooleanFieldDescriptor(BaseFieldDescriptor):
format: Optional[Literal["default"]] = None
constraints: Optional[BaseConstraints[bool]] = None

true_values: Optional[List[str]] = pydantic.Field(
true_values: Optional[List[str]] = PydanticField(
default=settings.DEFAULT_TRUE_VALUES,
alias="trueValues",
validation_alias=pydantic.AliasChoices("trueValues", "true_values"),
validation_alias=AliasChoices("trueValues", "true_values"),
)
"""
Values to be interpreted as “true” for boolean fields
"""

false_values: Optional[List[str]] = pydantic.Field(
false_values: Optional[List[str]] = PydanticField(
default=settings.DEFAULT_FALSE_VALUES,
alias="falseValues",
validation_alias=pydantic.AliasChoices("falseValues", "false_values"),
validation_alias=AliasChoices("falseValues", "false_values"),
)
"""
Values to be interpreted as “false” for boolean fields
Expand Down Expand Up @@ -122,18 +136,6 @@ def write_value(self, cell: Optional[bool]) -> Optional[str]:
return self.true_values[0] if cell else self.false_values[0]
return None

@pydantic.model_validator(mode="after")
def validate_example(self) -> Self:
# If example is provided, check it's in true_values or false_values
if self.example is not None:
allowed_values = (self.true_values or []) + (self.false_values or [])
if self.example not in allowed_values:
raise ValueError(
f'example value "{self.example}" for field "{self.name}" is not valid'
)

return self


class ArrayFieldDescriptor(BaseFieldDescriptor):
"""The field contains a valid JSON array."""
Expand All @@ -143,7 +145,7 @@ class ArrayFieldDescriptor(BaseFieldDescriptor):
constraints: Optional[JSONConstraints] = None

# TODO type is not accurate : array item are unnamed, not described etc
array_item: Optional[FieldDescriptor] = pydantic.Field(
array_item: Optional[FieldDescriptor] = PydanticField(
default=None, alias="arrayItem"
)

Expand Down Expand Up @@ -202,7 +204,7 @@ class GeoPointFieldDescriptor(BaseFieldDescriptor):
constraints: Optional[BaseConstraints[str]] = None


class CategoryDict(pydantic.BaseModel):
class CategoryDict(BaseModel):
value: str
label: Optional[str] = None

Expand All @@ -225,24 +227,85 @@ class IntegerFieldDescriptor(BaseFieldDescriptor):
Property to restrict the field to a finite set of possible values
"""

categories_ordered: Optional[bool] = pydantic.Field(
categories_ordered: Optional[bool] = PydanticField(
default=None, alias="categoriesOrdered"
)
"""
When categoriesOrdered is true, implementations SHOULD regard the order of
appearance of the values in the categories property as their natural order.
"""

group_char: Optional[str] = pydantic.Field(default=None, alias="groupChar")
group_char: Optional[str] = PydanticField(default=None, alias="groupChar")
"""
String whose value is used to group digits for integer/number fields
"""

bare_number: Optional[bool] = pydantic.Field(default=None, alias="bareNumber")
bare_number: bool = PydanticField(
default=settings.DEFAULT_BARE_NUMBER, alias="bareNumber"
)
"""
If false leading and trailing non numbers will be removed for integer/number fields
"""

def read_value(self, cell: Any) -> Optional[int]:
"""read_value converts the physical (possibly typed) representation to
a logical integer representation.

See "Data representation" in the glossary for more details.
https://datapackage.org/standard/glossary/#data-representation

If the physical representation is already typed as an integer, the
value is returned unchanged.

If the physical representation is a string, then the string is parsed
as an integer. If `bare_number` is False, non-digit characters are
removed first. `None` is returned if the string cannot be parsed.

If the physical representation is a float or Decimal that represents
a whole number, it is converted to an integer.

Any other typed input will return `None`.
"""
import re
from decimal import Decimal

if isinstance(cell, bool):
return None

elif isinstance(cell, int):
return cell

elif isinstance(cell, str):
cell = cell.strip()

# Process the cell (remove non-digit characters if bare_number is False)
if not self.bare_number:
pattern = re.compile(r"((^[^-\d]*)|(\D*$))")
cell = pattern.sub("", cell)

# Cast the cell
try:
return int(cell)
except Exception:
return None

elif isinstance(cell, float) and cell.is_integer():
return int(cell)
elif isinstance(cell, Decimal) and cell % 1 == 0:
return int(cell)

return None

def write_value(self, cell: Optional[int]) -> Optional[str]:
"""write_value converts the logical integer representation to
a physical (string) representation.

Returns the integer as a string.
"""
if cell is None:
return None
return str(cell)


IItemType = Literal[
"boolean",
Expand All @@ -262,7 +325,7 @@ class ListFieldDescriptor(BaseFieldDescriptor):

type: Literal["list"] = "list"
format: Optional[Literal["default"]] = None
constraints: CollectionConstraints = pydantic.Field(
constraints: CollectionConstraints = PydanticField(
default_factory=CollectionConstraints
)

Expand All @@ -271,7 +334,7 @@ class ListFieldDescriptor(BaseFieldDescriptor):
Specifies the character sequence which separates lexically represented list items.
"""

item_type: Optional[IItemType] = pydantic.Field(default=None, alias="itemType")
item_type: Optional[IItemType] = PydanticField(default=None, alias="itemType")
"""
Specifies the list item type in terms of existent Table Schema types.
"""
Expand All @@ -284,17 +347,17 @@ class NumberFieldDescriptor(BaseFieldDescriptor):
format: Optional[Literal["default"]] = None
constraints: Optional[ValueConstraints[float]] = None

decimal_char: Optional[str] = pydantic.Field(default=None, alias="decimalChar")
decimal_char: Optional[str] = PydanticField(default=None, alias="decimalChar")
"""
String whose value is used to represent a decimal point for number fields
"""

group_char: Optional[str] = pydantic.Field(default=None, alias="groupChar")
group_char: Optional[str] = PydanticField(default=None, alias="groupChar")
"""
String whose value is used to group digits for integer/number fields
"""

bare_number: Optional[bool] = pydantic.Field(default=None, alias="bareNumber")
bare_number: Optional[bool] = PydanticField(default=None, alias="bareNumber")
"""
If false leading and trailing non numbers will be removed for integer/number fields
"""
Expand Down Expand Up @@ -324,7 +387,7 @@ class StringFieldDescriptor(BaseFieldDescriptor):

type: Literal["string"] = "string"
format: Optional[IStringFormat] = None
constraints: StringConstraints = pydantic.Field(default_factory=StringConstraints)
constraints: StringConstraints = PydanticField(default_factory=StringConstraints)

categories: Optional[ICategories] = None
"""
Expand Down
74 changes: 4 additions & 70 deletions frictionless/fields/integer.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
from __future__ import annotations
from ..schema.field import Field

import re
from decimal import Decimal
from typing import Any

import attrs

from .. import settings
from ..schema import Field


@attrs.define(kw_only=True, repr=False)
class IntegerField(Field):
### TEMP Only required for Metadata compatibility
### This is required because "metadata_import" makes a distinction based
### on the "type" property (`is_typed_class`)
type = "integer"
builtin = True
supported_constraints = [
Expand All @@ -20,62 +13,3 @@ class IntegerField(Field):
"maximum",
"enum",
]

bare_number: bool = settings.DEFAULT_BARE_NUMBER
"""
It specifies that the value is a bare number. If true, the pattern to
remove non digit character does not get applied and vice versa.
The default value is True.
"""

# Read

def create_value_reader(self):
# Create pattern
pattern = None
if not self.bare_number:
pattern = re.compile(r"((^[^-\d]*)|(\D*$))")

# Create reader
def value_reader(cell: Any):
if isinstance(cell, str):
cell = cell.strip()

# Process the cell
if pattern:
cell = pattern.sub("", cell)

# Cast the cell
try:
return int(cell)
except Exception:
return None

elif cell is True or cell is False:
return None
elif isinstance(cell, int):
return cell
elif isinstance(cell, float) and cell.is_integer():
return int(cell)
elif isinstance(cell, Decimal) and cell % 1 == 0:
return int(cell)
return None

return value_reader

# Write

def create_value_writer(self):
# Create writer
def value_writer(cell: Any):
return str(cell)

return value_writer

# Metadata

metadata_profile_patch = {
"properties": {
"bareNumber": {"type": "boolean"},
}
}
Loading