diff --git a/CHANGELOG.md b/CHANGELOG.md index 10474e9..5c6f3c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Updated + +- Tratando CNPJ formato 2026 nos métodos do utilitário cnpj (https://github.com/brazilian-utils/python/issues/478) + ## [2.3.0] - 2025-10-07 ### Added diff --git a/README.md b/README.md index 2b1e468..1e873db 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,7 @@ Verifica se os dígitos de verificação do CNPJ (Cadastro Nacional da Pessoa Jurídica) fornecido correspondem ao seu número base. A entrada deve ser uma string de dígitos com o comprimento apropriado. Esta função não verifica a existência do CNPJ; ela só valida o formato da string. +OBS.: Já está tratando CNPJs no novo formato (2026) Argumentos: @@ -217,6 +218,8 @@ Exemplo: >>> from brutils import is_valid_cnpj >>> is_valid_cnpj('03560714000142') True +>>> is_valid_cnpj('12ABC34501DE') +True >>> is_valid_cnpj('00111222000133') False ``` @@ -242,6 +245,8 @@ Exemplo: >>> from brutils import format_cnpj >>> format_cnpj("03560714000142") '03.560.714/0001-42' +>>> format_cnpj("12ABC34501DE35") +'12.ABC.345/01DE-35' >>> format_cnpj("98765432100100") None ``` @@ -277,6 +282,7 @@ opcional pode ser fornecido; o padrão é 1. Argumentos: - branch (int): Um número de filial opcional a ser incluído no CNPJ. +- new_format (bool): Boolean para definir se o CNPJ será no novo formato (2026). Retorna: @@ -288,6 +294,8 @@ Exemplo: >>> from brutils import generate_cnpj >>> generate_cnpj() '34665388000161' +>>> generate_cnpj(new_format=True) +'BDPDVE7250ZX31' >>> generate_cnpj(1234) "01745284123455" ``` diff --git a/README_EN.md b/README_EN.md index 32017ea..c0dc0ab 100644 --- a/README_EN.md +++ b/README_EN.md @@ -202,6 +202,7 @@ Returns whether or not the verifying checksum digits of the given CNPJ Input should be a digit string of proper length. This function does not verify the existence of the CNPJ; it only validates the format of the string. +P.S.: It's already included the new CNPJ version (2026) Args: @@ -218,6 +219,8 @@ Example: >>> from brutils import is_valid_cnpj >>> is_valid_cnpj('03560714000142') True +>>> is_valid_cnpj('12ABC34501DE') +True >>> is_valid_cnpj('00111222000133') False ``` @@ -244,6 +247,8 @@ Example: >>> from brutils import format_cnpj >>> format_cnpj("03560714000142") '03.560.714/0001-42' +>>> format_cnpj("12ABC34501DE35") +'12.ABC.345/01DE-35' >>> format_cnpj("98765432100100") None ``` @@ -279,6 +284,7 @@ string. An optional branch number parameter can be given; it defaults to 1. Args: - branch (int): An optional branch number to be included in the CNPJ. +- new_format (bool): Bool to use the new CNPJ version format (2026). Returns: @@ -290,6 +296,8 @@ Example: >>> from brutils import generate_cnpj >>> generate_cnpj() '34665388000161' +>>> generate_cnpj(new_format=True) +'BDPDVE7250ZX31' >>> generate_cnpj(1234) "01745284123455" ``` diff --git a/brutils/cnpj.py b/brutils/cnpj.py index aabd374..d863222 100644 --- a/brutils/cnpj.py +++ b/brutils/cnpj.py @@ -1,8 +1,18 @@ from itertools import chain -from random import randint +from random import randint, choice +import string + +# ============================================================================= +# 27-10-2025 +# Estou seguindo o que foi explicitado aqui para as mudanças sugeridas: +# https://normasinternet2.receita.fazenda.gov.br/#/consulta/externa/141102 +# Especificamente no Anexo Único +# ============================================================================= + +# ============================================================================= # FORMATTING -############ +# ============================================================================= def sieve(dirty): # type: (str) -> str @@ -56,157 +66,159 @@ def remove_symbols(dirty): # type: (str) -> str def display(cnpj): # type: (str) -> str """ - Will format an adequately formatted numbers-only CNPJ string, - adding in standard formatting visual aid symbols for display. - Formats a CNPJ (Brazilian Company Registration Number) string for - visual display. + visual display, adding standard separators. - This function takes a CNPJ string as input, validates its format, and - formats it with standard visual aid symbols for display purposes. + Supports both numeric and alphanumeric formats, following the + 2026 specification. Args: - cnpj (str): The CNPJ string to be formatted for display. + cnpj (str): The CNPJ string (numeric or alphanumeric). Returns: - str: The formatted CNPJ with visual aid symbols if it's valid, - None if it's not valid. + str: The formatted CNPJ string, or None if invalid. Example: - >>> display("12345678901234") - "12.345.678/9012-34" - >>> display("98765432100100") - "98.765.432/1001-00" - - .. note:: - This method should not be used in new code and is only provided for - backward compatibility. + >>> display("12345678000195") + "12.345.678/0001-95" + >>> display("12ABC34501DE35") + "12.ABC.345/01DE-35" + + Notes: + - Prior to 2026, only numeric CNPJs are expected. + - From 2026 onwards, alphanumeric roots and orders are allowed. """ + clean = sieve(cnpj) - if not cnpj.isdigit() or len(cnpj) != 14 or len(set(cnpj)) == 1: + # Must have exactly 14 characters (including letters/numbers) + if len(clean) != 14 or len(set(clean)) == 1: return None - return "{}.{}.{}/{}-{}".format( - cnpj[:2], cnpj[2:5], cnpj[5:8], cnpj[8:12], cnpj[12:] - ) + + # Split parts (root, order, DV) + root = clean[:8] + order = clean[8:12] + dv = clean[12:] + + # Compose visually formatted CNPJ + return "{}.{}.{}/{}-{}".format(root[:2], root[2:5], root[5:8], order, dv) + + def format_cnpj(cnpj): # type: (str) -> str """ - Formats a CNPJ (Brazilian Company Registration Number) string for visual - display. + Validates and formats a CNPJ (Brazilian Company Registration Number) + for visual display. - This function takes a CNPJ string as input, validates its format, and - formats it with standard visual aid symbols for display purposes. + Supports both numeric and alphanumeric formats (post-2026). Args: - cnpj (str): The CNPJ string to be formatted for display. + cnpj (str): The CNPJ string to be formatted. Returns: - str: The formatted CNPJ with visual aid symbols if it's valid, - None if it's not valid. + str: A formatted version of the CNPJ if valid, or None if invalid. Example: >>> format_cnpj("03560714000142") '03.560.714/0001-42' - >>> format_cnpj("98765432100100") - None + >>> format_cnpj("12ABC34501DE35") + '12.ABC.345/01DE-35' """ - if not is_valid(cnpj): return None - - return "{}.{}.{}/{}-{}".format( - cnpj[:2], cnpj[2:5], cnpj[5:8], cnpj[8:12], cnpj[12:14] - ) + return display(sieve(cnpj)) -# OPERATIONS -############ +# ============================================================================= +# VALIDATION HELPERS +# ============================================================================= -def validate(cnpj): # type: (str) -> bool +def is_alphanumeric_cnpj(cnpj): # type: (str) -> bool """ - Validates a CNPJ (Brazilian Company Registration Number) by comparing its - verifying checksum digits to its base number. - - This function checks the validity of a CNPJ by comparing its verifying - checksum digits to its base number. The input should be a string of digits - with the appropriate length. + Detects whether a given CNPJ contains letters, indicating + the new alphanumeric format valid from 2026 onwards. Args: - cnpj (str): The CNPJ to be validated. + cnpj (str): The CNPJ string to analyze. Returns: - bool: True if the checksum digits match the base number, - False otherwise. + bool: True if the CNPJ includes alphabetic characters. + """ + return any(ch.isalpha() for ch in cnpj) - Example: - >>> validate("03560714000142") - True - >>> validate("00111222000133") - False - .. note:: - This method should not be used in new code and is only provided for - backward compatibility. +def ascii_value(ch): # type: (str) -> int + """ + Converts a single alphanumeric character into its numeric value + used in the alphanumeric CNPJ checksum calculation. """ + if not ch.isalnum(): + raise ValueError(f"Invalid CNPJ character: {ch}") + return ord(ch.upper()) - 48 - if not cnpj.isdigit() or len(cnpj) != 14 or len(set(cnpj)) == 1: - return False - return all( - _hashdigit(cnpj, i + 13) == int(v) for i, v in enumerate(cnpj[12:]) - ) -def is_valid(cnpj): # type: (str) -> bool +# ============================================================================= +# CORE VALIDATION AND CHECKSUM LOGIC +# ============================================================================= + + +def validate(cnpj): # type: (str) -> bool """ - Returns whether or not the verifying checksum digits of the given `cnpj` - match its base number. + Validates both traditional numeric CNPJs and the new + alphanumeric format introduced in 2026. - This function does not verify the existence of the CNPJ; it only - validates the format of the string. + The validation rule used depends on whether the CNPJ contains + alphabetic characters. Args: - cnpj (str): The CNPJ to be validated, a 14-digit string + cnpj (str): The CNPJ string (numeric or alphanumeric). Returns: - bool: True if the checksum digits match the base number, - False otherwise. - - Example: - >>> is_valid("03560714000142") - True - >>> is_valid("00111222000133") - False + bool: True if the CNPJ is valid according to its format. """ + clean = sieve(cnpj) + if len(clean) != 14: + return False - return isinstance(cnpj, str) and validate(cnpj) + # Detect which version of the CNPJ is being used + if is_alphanumeric_cnpj(clean): + base, dv = clean[:12], clean[12:] + expected = _checksum_alphanumeric(base) + else: + base, dv = clean[:12], clean[12:] + expected = _checksum(base) + + return dv == expected -def generate(branch=1): # type: (int) -> str +def is_valid(cnpj): # type: (str) -> bool """ - Generates a random valid CNPJ digit string. An optional branch number - parameter can be given; it defaults to 1. + Checks whether the given CNPJ string (numeric or alphanumeric) + is valid by verifying its checksum digits. Args: - branch (int): An optional branch number to be included in the CNPJ. + cnpj (str): The CNPJ to be validated. Returns: - str: A randomly generated valid CNPJ string. + bool: True if the CNPJ is valid, False otherwise. Example: - >>> generate() - "30180536000105" - >>> generate(1234) - "01745284123455" + >>> is_valid("34665388000161") + True + >>> is_valid("12ABC34501DE35") + True + >>> is_valid("00000000000000") + False """ - branch %= 10000 - branch += int(branch == 0) - branch = str(branch).zfill(4) - base = str(randint(0, 99999999)).zfill(8) + branch + return isinstance(cnpj, str) and validate(cnpj) - return base + _checksum(base) + +# ============================================================================= +# CHECKSUM GENERATION (NUMERIC) +# ============================================================================= def _hashdigit(cnpj, position): # type: (str, int) -> int @@ -220,14 +232,7 @@ def _hashdigit(cnpj, position): # type: (str, int) -> int Returns: int: The calculated checksum digit. - - Example: - >>> _hashdigit("12345678901234", 13) - 3 - >>> _hashdigit("98765432100100", 14) - 9 """ - weightgen = chain(range(position - 8, 1, -1), range(9, 1, -1)) val = ( sum(int(digit) * weight for digit, weight in zip(cnpj, weightgen)) % 11 @@ -237,26 +242,95 @@ def _hashdigit(cnpj, position): # type: (str, int) -> int def _checksum(basenum): # type: (str) -> str """ - Calculates the verifying checksum digits for a given CNPJ base number. + Calculates the verifying checksum digits for a given numeric CNPJ base. + + Args: + basenum (str): The 12-digit numeric CNPJ base. + + Returns: + str: The 2 verifying digits. + """ + verifying_digits = str(_hashdigit(basenum, 13)) + verifying_digits += str(_hashdigit(basenum + verifying_digits, 14)) + return verifying_digits + + +# ============================================================================= +# CHECKSUM GENERATION (ALPHANUMERIC) +# ============================================================================= + + +def _hashdigit_alphanumeric(cnpj, position): # type: (str, int) -> int + """ + Calculates the checksum digit at the given `position` for the + alphanumeric CNPJ format (valid from 2026). + + Conversion uses ASCII values minus 48, as defined by RFB. + + Args: + cnpj (str): The alphanumeric CNPJ string. + position (int): The position of the checksum digit. + + Returns: + int: The calculated checksum digit. + """ + weightgen = chain(range(position - 8, 1, -1), range(9, 1, -1)) + converted = [ascii_value(ch) for ch in cnpj] + val = sum(v * w for v, w in zip(converted, weightgen)) % 11 + return 0 if val < 2 else 11 - val - This function computes the verifying checksum digits for a provided CNPJ - base number. The `basenum` should be a digit-string of the appropriate - length. + +def _checksum_alphanumeric(basenum): # type: (str) -> str + """ + Calculates the verifying checksum digits for an alphanumeric + CNPJ base, following the ASCII - 48 rule and Mod 11. Args: - basenum (str): The base number of the CNPJ for which verifying checksum - digits are calculated. + basenum (str): The 12-character alphanumeric base. Returns: - str: The verifying checksum digits. + str: The 2 verifying digits. + """ + d1 = _hashdigit_alphanumeric(basenum, 13) + d2 = _hashdigit_alphanumeric(basenum + str(d1), 14) + return f"{d1}{d2}" + + +# ============================================================================= +# GENERATION +# ============================================================================= + + +def generate(branch=1, new_format=False): # type: (int, bool) -> str + """ + Generates a valid CNPJ string. + + If `new_format` is False, a traditional numeric CNPJ will be generated. + If `new_format` is True, an alphanumeric CNPJ will be generated following + the 2026 specification (letters allowed in positions 1–12). + + Args: + branch (int): Optional branch number for numeric CNPJs (default = 1). + new_format (bool): If True, uses the alphanumeric CNPJ rules. + + Returns: + str: A valid CNPJ string (numeric or alphanumeric). Example: - >>> _checksum("123456789012") - "30" - >>> _checksum("987654321001") - "41" + >>> generate() + "30180536000105" + >>> generate(new_format=True) + "12AB3C4D0001E5" """ + if new_format: + # Generate alphanumeric root (8 chars) and order (4 chars) + alphabet = string.ascii_uppercase + string.digits + base = "".join(choice(alphabet) for _ in range(12)) + return base + _checksum_alphanumeric(base) - verifying_digits = str(_hashdigit(basenum, 13)) - verifying_digits += str(_hashdigit(basenum + verifying_digits, 14)) - return verifying_digits + # Legacy numeric format (default) + branch %= 10000 + branch += int(branch == 0) + branch = str(branch).zfill(4) + base = str(randint(0, 99999999)).zfill(8) + branch + return base + _checksum(base) diff --git a/tests/test_cnpj.py b/tests/test_cnpj.py index 2ec7261..6d13b39 100644 --- a/tests/test_cnpj.py +++ b/tests/test_cnpj.py @@ -3,11 +3,15 @@ from brutils.cnpj import ( _checksum, + _checksum_alphanumeric, _hashdigit, + _hashdigit_alphanumeric, + ascii_value, display, format_cnpj, generate, is_valid, + is_alphanumeric_cnpj, remove_symbols, sieve, validate, @@ -15,6 +19,10 @@ class TestCNPJ(TestCase): + # ------------------------------------------------------------------------- + # LEGACY TESTS (numeric CNPJ) + # ------------------------------------------------------------------------- + def test_sieve(self): self.assertEqual(sieve("00000000000"), "00000000000") self.assertEqual(sieve("12.345.678/0001-90"), "12345678000190") @@ -25,11 +33,41 @@ def test_sieve(self): ) self.assertEqual(sieve("/...---.../"), "") - def test_display(self): - self.assertEqual(display("00000000000109"), "00.000.000/0001-09") - self.assertIsNone(display("00000000000000")) - self.assertIsNone(display("0000000000000")) - self.assertIsNone(display("0000000000000a")) + def test_display_numeric_valid(self): + # Traditional numeric CNPJ + self.assertEqual(display("12345678000195"), "12.345.678/0001-95") + + def test_display_alphanumeric_valid(self): + # Alphanumeric CNPJ (valid from 2026) + formatted = display("12ABC34501DE35") + self.assertEqual(formatted, "12.ABC.345/01DE-35") + + def test_display_invalid_cases(self): + # Too short + self.assertIsNone(display("1234567890123")) + # All same character + self.assertIsNone(display("AAAAAAAAAAAAAA")) + # Empty string + self.assertIsNone(display("")) + + def test_format_cnpj_numeric_valid(self): + # Numeric CNPJ should format properly + self.assertEqual(format_cnpj("03560714000142"), "03.560.714/0001-42") + + def test_format_cnpj_alphanumeric_valid(self): + # Alphanumeric CNPJ with computed DV + base = "12ABC34501DE" + dv = _checksum_alphanumeric(base) + valid_cnpj = base + dv + formatted = format_cnpj(valid_cnpj) + self.assertEqual(formatted, "12.ABC.345/01DE-" + dv) + + def test_format_cnpj_invalid_returns_none(self): + # Invalid numeric + self.assertIsNone(format_cnpj("12345678000100")) + # Invalid alphanumeric (wrong DV) + self.assertIsNone(format_cnpj("12ABC34501DE99")) + def test_validate(self): self.assertIs(validate("34665388000161"), True) @@ -68,9 +106,10 @@ def test_is_valid(self): self.assertIs(is_valid("01838723000127"), True) def test_generate(self): - for _ in range(10_000): - self.assertIs(validate(generate()), True) - self.assertIsNotNone(display(generate())) + for _ in range(1000): + cnpj = generate() + self.assertTrue(validate(cnpj)) + self.assertIsNotNone(display(cnpj)) def test__hashdigit(self): self.assertEqual(_hashdigit("00000000000000", 13), 0) @@ -82,6 +121,64 @@ def test__checksum(self): self.assertEqual(_checksum("00000000000000"), "00") self.assertEqual(_checksum("52513127000299"), "99") + # ------------------------------------------------------------------------- + # NEW TESTS (alphanumeric CNPJ - 2026 format) + # ------------------------------------------------------------------------- + + def test_is_alphanumeric_cnpj(self): + self.assertTrue(is_alphanumeric_cnpj("12AB345C0001D5")) + self.assertFalse(is_alphanumeric_cnpj("12345678000195")) + + def test_ascii_value_conversion(self): + # Digits: ASCII(0–9) → 48–57 → minus 48 → 0–9 + self.assertEqual(ascii_value("0"), 0) + self.assertEqual(ascii_value("9"), 9) + # Letters: ASCII(A) = 65 → minus 48 → 17 + self.assertEqual(ascii_value("A"), 17) + self.assertEqual(ascii_value("Z"), 42) + self.assertEqual(ascii_value("b"), 18) # lowercase also works + + def test_hashdigit_alphanumeric(self): + # Simple test to ensure deterministic checksum generation + result = _hashdigit_alphanumeric("12ABC34501DE", 13) + self.assertIsInstance(result, int) + self.assertTrue(0 <= result <= 9) + + def test_checksum_alphanumeric(self): + # Ensure it returns exactly 2 digits as string + base = "12ABC34501DE" + dv = _checksum_alphanumeric(base) + self.assertEqual(len(dv), 2) + self.assertTrue(dv.isdigit()) + + def test_validate_cnpj_alphanumeric(self): + base = "12ABC34501DE" + dv = _checksum_alphanumeric(base) + valid_cnpj = base + dv + + # Should be valid + self.assertTrue(validate(valid_cnpj)) + self.assertTrue(is_valid(valid_cnpj)) + + # Invalid if DV altered + invalid_cnpj = valid_cnpj[:-1] + "9" + self.assertFalse(validate(invalid_cnpj)) + + def test_generate_alphanumeric(self): + # Generates and validates 100 random new-format CNPJs + for _ in range(200): + cnpj = generate(new_format=True) + self.assertEqual(len(cnpj), 14) + self.assertTrue(any(c.isalpha() for c in cnpj[:12])) + self.assertTrue(cnpj[-2:].isdigit()) + self.assertTrue(is_valid(cnpj)) + + def test_generate_branch_numeric_still_valid(self): + # Backward compatible + cnpj = generate(branch=9999) + self.assertTrue(is_valid(cnpj)) + self.assertFalse(any(c.isalpha() for c in cnpj)) + @patch("brutils.cnpj.sieve") class TestRemoveSymbols(TestCase):