Skip to content

Commit 937d7ed

Browse files
committed
Add a new HTMLEntry
1 parent c8eadea commit 937d7ed

File tree

11 files changed

+352
-19
lines changed

11 files changed

+352
-19
lines changed

src/adapter/etl-adapter-parquet/tests/Flow/ETL/Adapter/Parquet/Tests/Unit/RowsNormalizerTest.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ enum_entry,
1212
enum_schema,
1313
float_entry,
1414
float_schema,
15+
html_entry,
16+
html_schema,
1517
int_entry,
1618
int_schema,
1719
json_entry,
@@ -72,6 +74,7 @@ public function test_normalization_nullable_entries() : void
7274
),
7375
enum_entry('enum', null),
7476
xml_entry('xml', null),
77+
html_entry('html', null),
7578
)
7679
);
7780
$schema = schema(
@@ -101,6 +104,7 @@ enum_entry('enum', null),
101104
),
102105
enum_schema('enum', BackedStringEnum::class, true),
103106
xml_schema('xml', true),
107+
html_schema('html', true),
104108
);
105109

106110
self::assertEquals(
@@ -119,6 +123,7 @@ enum_schema('enum', BackedStringEnum::class, true),
119123
'struct' => null,
120124
'enum' => null,
121125
'xml' => null,
126+
'html' => null,
122127
],
123128
],
124129
(new RowsNormalizer())->normalize($rows, $schema)

src/core/etl/src/Flow/ETL/DSL/functions.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@
165165
use Flow\ETL\Retry\RetryStrategy\{AnyThrowable, OnExceptionTypes};
166166
use Flow\ETL\Row\{Entries, EntryFactory, SortOrder};
167167
use Flow\ETL\Row\Entry\{BooleanEntry, DateEntry, DateTimeEntry, EnumEntry, FloatEntry, IntegerEntry, JsonEntry, ListEntry, MapEntry, StringEntry, StructureEntry, TimeEntry, UuidEntry, XMLElementEntry, XMLEntry};
168+
use Flow\ETL\Row\Entry\HTMLEntry;
168169
use Flow\ETL\Row\{Entry, EntryReference, Reference, References};
169170
use Flow\ETL\Row\Formatter\ASCIISchemaFormatter;
170171
use Flow\ETL\Schema\{Definition, Formatter\PHPFormatter\TypeFormatter, Formatter\PHPFormatter\ValueFormatter};
@@ -627,6 +628,15 @@ function xml_element_entry(string $name, \DOMElement|string|null $value, ?Metada
627628
return new XMLElementEntry($name, $value, $metadata);
628629
}
629630

631+
/**
632+
* @return Entry<?HTMLDocument>
633+
*/
634+
#[DocumentationDSL(module: Module::CORE, type: DSLType::ENTRY)]
635+
function html_entry(string $name, HTMLDocument|string|null $value, ?Metadata $metadata = null) : Entry
636+
{
637+
return new HTMLEntry($name, $value, $metadata);
638+
}
639+
630640
/**
631641
* @param Entry<mixed> ...$entries
632642
*/
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Row\Entry;
6+
7+
use function Flow\Types\DSL\{type_equals, type_html, type_optional};
8+
use Flow\ETL\Row\{Entry, Reference};
9+
use Flow\ETL\Schema\{Definition, Metadata};
10+
use Flow\Types\Type;
11+
use Flow\Types\Value\HTMLDocument;
12+
13+
/**
14+
* @implements Entry<?HTMLDocument>
15+
*/
16+
final class HTMLEntry implements Entry
17+
{
18+
use EntryRef;
19+
20+
private Metadata $metadata;
21+
22+
/**
23+
* @var Type<HTMLDocument>
24+
*/
25+
private readonly Type $type;
26+
27+
private ?HTMLDocument $value;
28+
29+
public function __construct(
30+
private readonly string $name,
31+
HTMLDocument|string|null $value,
32+
?Metadata $metadata = null,
33+
) {
34+
if (\is_string($value)) {
35+
$this->value = HTMLDocument::fromString($value);
36+
} else {
37+
$this->value = $value;
38+
}
39+
40+
$this->metadata = $metadata ?: Metadata::empty();
41+
$this->type = type_html();
42+
}
43+
44+
public function __toString() : string
45+
{
46+
return $this->toString();
47+
}
48+
49+
public function definition() : Definition
50+
{
51+
return new Definition($this->name, $this->type, null === $this->value, $this->metadata);
52+
}
53+
54+
public function duplicate() : self
55+
{
56+
return new self($this->name, $this->value ? clone $this->value : null, $this->metadata);
57+
}
58+
59+
public function is(Reference|string $name) : bool
60+
{
61+
if ($name instanceof Reference) {
62+
return $this->name === $name->name();
63+
}
64+
65+
return $this->name === $name;
66+
}
67+
68+
public function isEqual(Entry $entry) : bool
69+
{
70+
if (!$entry instanceof self || !$this->is($entry->name())) {
71+
return false;
72+
}
73+
74+
if (!type_equals($this->type, $entry->type)) {
75+
return false;
76+
}
77+
78+
return $entry->value()?->toString() === $this->value?->toString();
79+
}
80+
81+
public function map(callable $mapper) : self
82+
{
83+
return new self($this->name, $mapper($this->value));
84+
}
85+
86+
public function name() : string
87+
{
88+
return $this->name;
89+
}
90+
91+
public function rename(string $name) : self
92+
{
93+
return new self($name, $this->value);
94+
}
95+
96+
public function toString() : string
97+
{
98+
if (null === $this->value) {
99+
return '';
100+
}
101+
102+
return $this->value->toString();
103+
}
104+
105+
public function type() : Type
106+
{
107+
return $this->type;
108+
}
109+
110+
public function value() : ?HTMLDocument
111+
{
112+
return $this->value;
113+
}
114+
115+
public function withValue(mixed $value) : self
116+
{
117+
return new self($this->name, type_optional($this->type())->assert($value), $this->metadata);
118+
}
119+
}

src/core/etl/src/Flow/ETL/Row/EntryFactory.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
datetime_entry,
1010
enum_entry,
1111
float_entry,
12+
html_entry,
1213
int_entry,
1314
json_entry,
1415
json_object_entry,
@@ -51,6 +52,7 @@ enum_entry,
5152
UuidType,
5253
XMLElementType,
5354
XMLType};
55+
use Flow\Types\Type\Logical\HTMLType;
5456
use Flow\Types\Type\Native\{
5557
ArrayType,
5658
BooleanType,
@@ -171,6 +173,7 @@ public function createAs(string $entryName, mixed $value, Definition|Type $defin
171173
NullType::class => StringEntry::fromNull($entryName, $metadata),
172174
XMLType::class => xml_entry($entryName, null, $metadata),
173175
XMLElementType::class => xml_element_entry($entryName, null, $metadata),
176+
HTMLType::class => html_entry($entryName, null, $metadata),
174177
default => throw new InvalidArgumentException("Can't convert value into type \"{$type->toString()}\""),
175178
};
176179
}
@@ -234,6 +237,10 @@ public function createAs(string $entryName, mixed $value, Definition|Type $defin
234237
}
235238
}
236239

240+
if ($type instanceof HTMLType) {
241+
return html_entry($entryName, type_optional($type)->cast($value), $metadata);
242+
}
243+
237244
if ($type instanceof XMLType) {
238245
return xml_entry($entryName, type_optional($type)->cast($value), $metadata);
239246
}

src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/DisplayTest.php

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ enum_entry,
1111
float_entry,
1212
from_array,
1313
from_rows,
14+
html_entry,
1415
int_entry,
1516
json_entry,
1617
list_entry,
@@ -75,6 +76,7 @@ public function extract(FlowContext $context) : \Generator
7576
),
7677
enum_entry('enum', BackedStringEnum::three),
7778
xml_entry('xml', '<xml><node id="123">test<foo>bar</foo></node></xml>'),
79+
html_entry('html', '<html lang="en"><body><div><span>bar</span></div></body></html>'),
7880
),
7981
);
8082
}
@@ -84,15 +86,15 @@ enum_entry('enum', BackedStringEnum::three),
8486

8587
self::assertCommandOutputIdentical(
8688
<<<'ASCIITABLE'
87-
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+
88-
| id | price | 100 | deleted | created-at | phase | array | list | map | items | enum | xml |
89-
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+
90-
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> |
91-
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> |
92-
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> |
93-
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> |
94-
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> |
95-
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+
89+
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+----------------------+
90+
| id | price | 100 | deleted | created-at | phase | array | list | map | items | enum | xml | html |
91+
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+----------------------+
92+
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> | <html lang="en"><bod |
93+
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> | <html lang="en"><bod |
94+
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> | <html lang="en"><bod |
95+
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> | <html lang="en"><bod |
96+
| 1234 | 123.450000 | 100 | false | 2020-07-13T15:00:00+ | | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | three | <xml><node id="123"> | <html lang="en"><bod |
97+
+------+------------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+-------+----------------------+----------------------+
9698
5 rows
9799

98100
ASCIITABLE,

0 commit comments

Comments
 (0)