Skip to content

Commit 456d697

Browse files
committed
refactor(stub): Merge _FeedParser into XMLParser and HTMLParser
Simpler maintenance for potentially bigger change coming
1 parent defc7e5 commit 456d697

2 files changed

Lines changed: 120 additions & 55 deletions

File tree

src/lxml-stubs/etree/__init__.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ from ._parser import (
9595
XMLParser as XMLParser,
9696
XMLPullParser as XMLPullParser,
9797
XMLSyntaxError as XMLSyntaxError,
98-
_FeedParser as _FeedParser,
98+
# _FeedParser as _FeedParser, # removed
9999
get_default_parser as get_default_parser,
100100
set_default_parser as set_default_parser,
101101
)

src/lxml-stubs/etree/_parser.pyi

Lines changed: 119 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -56,58 +56,6 @@ class ParseError(LxmlSyntaxError):
5656
class XMLSyntaxError(ParseError): ...
5757
class ParserError(LxmlError): ...
5858

59-
# Includes most stuff in _BaseParser
60-
class _FeedParser(Generic[_ET_co]):
61-
@property
62-
def error_log(self) -> _ListErrorLog: ...
63-
@property
64-
def resolvers(self) -> _ResolverRegistry: ...
65-
@property
66-
def version(self) -> LiteralString: ...
67-
def copy(self) -> Self: ...
68-
makeelement: _ElementFactory[_ET_co]
69-
# In terms of annotation, what setting class_lookup does
70-
# is change _ET_co (type specialization), which can't be
71-
# done automatically with current python typing system.
72-
# One has to change it manually during type checking.
73-
# Very few people would do, if there were any at all.
74-
def set_element_class_lookup(
75-
self, lookup: ElementClassLookup | None = None
76-
) -> None:
77-
"""
78-
Notes
79-
-----
80-
When calling this method, it is advised to also change typing
81-
specialization of concerned parser too, because current python
82-
typing system can't change it automatically.
83-
84-
Example
85-
-------
86-
Following code demonstrates how to create ``lxml.html.HTMLParser``
87-
manually from ``lxml.etree.HTMLParser``::
88-
89-
```python
90-
parser = etree.HTMLParser()
91-
reveal_type(parser) # HTMLParser[_Element]
92-
if TYPE_CHECKING:
93-
parser = cast('etree.HTMLParser[HtmlElement]', parser)
94-
else:
95-
parser.set_element_class_lookup(
96-
html.HtmlElementClassLookup())
97-
result = etree.fromstring(data, parser=parser)
98-
reveal_type(result) # HtmlElement
99-
```
100-
"""
101-
...
102-
103-
@deprecated("Removed since 5.0; renamed to set_element_class_lookup()")
104-
def setElementClassLookup(
105-
self, lookup: ElementClassLookup | None = None
106-
) -> None: ...
107-
@property
108-
def feed_error_log(self) -> _ListErrorLog: ...
109-
def feed(self, data: _AnyStr) -> None: ...
110-
11159
# Custom parser target support is abandoned,
11260
# see comment in XMLParser
11361
class _ParserTargetMixin(Generic[_T]):
@@ -125,7 +73,14 @@ class _PullParserMixin:
12573
# integration of custom target annotation (the 'target' parameter).
12674
# So far all attempts would cause usage of annotation unnecessarily
12775
# complex and convoluted, yet still can't get everything right.
128-
class XMLParser(_ParserTargetMixin[Any], _FeedParser[_ET_co]):
76+
class XMLParser(_ParserTargetMixin[Any], Generic[_ET_co]):
77+
"""The XML Parser. Parsers can be supplied as additional argument
78+
to various parse functions of the lxml API.
79+
80+
See Also
81+
--------
82+
- [API Documentation](https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser)
83+
"""
12984
def __init__(
13085
self,
13186
*,
@@ -147,6 +102,57 @@ class XMLParser(_ParserTargetMixin[Any], _FeedParser[_ET_co]):
147102
target: ParserTarget[Any] | None = None,
148103
compact: bool = True,
149104
) -> None: ...
105+
@property
106+
def error_log(self) -> _ListErrorLog:
107+
"""The error log of the last parser run."""
108+
@property
109+
def resolvers(self) -> _ResolverRegistry:
110+
"""The custom resolver registry of this parser."""
111+
@property
112+
def version(self) -> LiteralString:
113+
"""The version of the underlying XML parser."""
114+
def copy(self) -> Self:
115+
"""Create a new parser with the same configuration."""
116+
makeelement: _ElementFactory[_ET_co]
117+
"""Creates a new element associated with this parser."""
118+
def set_element_class_lookup(
119+
self, lookup: ElementClassLookup | None = None
120+
) -> None:
121+
"""Set a lookup scheme for element classes generated from this parser.
122+
123+
Annotation
124+
----------
125+
When calling this method, user would want to
126+
[change typing specialization](https://github.com/abelcheung/types-lxml/wiki/Using-specialised-class-directly#no-automatic-change-of-subscript)
127+
of concerned parser manually, because current python
128+
typing system can't change it automatically.
129+
Above link contains example on how to do it.
130+
131+
See Also
132+
--------
133+
- [API Documentation](https://lxml.de/apidoc/lxml.etree.html#lxml.etree._FeedParser.set_element_class_lookup)
134+
"""
135+
@deprecated("Removed since 5.0; renamed to set_element_class_lookup()")
136+
def setElementClassLookup(
137+
self, lookup: ElementClassLookup | None = None
138+
) -> None: ...
139+
@property
140+
def feed_error_log(self) -> _ListErrorLog:
141+
"""The error log of the last (or current) run of the feed parser.
142+
143+
See Also
144+
--------
145+
- [API Documentation](https://lxml.de/apidoc/lxml.etree.html#lxml.etree._FeedParser.feed_error_log)
146+
"""
147+
def feed(self, data: _AnyStr) -> None:
148+
"""Feeds data to the parser. The argument should be an 8-bit string
149+
buffer containing encoded data, although Unicode is supported as long
150+
as both string types are not mixed.
151+
152+
See Also
153+
--------
154+
- [API Documentation](https://lxml.de/apidoc/lxml.etree.html#lxml.etree._FeedParser.feed)
155+
"""
150156

151157
class XMLPullParser(_PullParserMixin, XMLParser[_ET_co]):
152158
def __init__(
@@ -201,7 +207,15 @@ class ETCompatXMLParser(XMLParser[_ET_co]):
201207
def set_default_parser(parser: _DefEtreeParsers[Any] | None) -> None: ...
202208
def get_default_parser() -> _DefEtreeParsers[Any]: ...
203209

204-
class HTMLParser(_ParserTargetMixin[Any], _FeedParser[_ET_co]):
210+
class HTMLParser(_ParserTargetMixin[Any], Generic[_ET_co]):
211+
"""This parser allows reading HTML into a normal XML tree. By default, it
212+
can read broken (non well-formed) HTML, depending on the capabilities of
213+
libxml2.
214+
215+
See Also
216+
--------
217+
- [API Documentation](https://lxml.de/apidoc/lxml.etree.html#lxml.etree.HTMLParser)
218+
"""
205219
@overload
206220
def __init__(
207221
self,
@@ -227,6 +241,57 @@ class HTMLParser(_ParserTargetMixin[Any], _FeedParser[_ET_co]):
227241
strip_cdata: bool,
228242
**__kw: Any,
229243
) -> None: ...
244+
@property
245+
def error_log(self) -> _ListErrorLog:
246+
"""The error log of the last parser run."""
247+
@property
248+
def resolvers(self) -> _ResolverRegistry:
249+
"""The custom resolver registry of this parser."""
250+
@property
251+
def version(self) -> LiteralString:
252+
"""The version of the underlying XML parser."""
253+
def copy(self) -> Self:
254+
"""Create a new parser with the same configuration."""
255+
makeelement: _ElementFactory[_ET_co]
256+
"""Creates a new element associated with this parser."""
257+
def set_element_class_lookup(
258+
self, lookup: ElementClassLookup | None = None
259+
) -> None:
260+
"""Set a lookup scheme for element classes generated from this parser.
261+
262+
Annotation
263+
----------
264+
When calling this method, user would want to
265+
[change typing specialization](https://github.com/abelcheung/types-lxml/wiki/Using-specialised-class-directly#no-automatic-change-of-subscript)
266+
of concerned parser manually, because current python
267+
typing system can't change it automatically.
268+
Above link contains example on how to do it.
269+
270+
See Also
271+
--------
272+
- [API Documentation](https://lxml.de/apidoc/lxml.etree.html#lxml.etree._FeedParser.set_element_class_lookup)
273+
"""
274+
@deprecated("Removed since 5.0; renamed to set_element_class_lookup()")
275+
def setElementClassLookup(
276+
self, lookup: ElementClassLookup | None = None
277+
) -> None: ...
278+
@property
279+
def feed_error_log(self) -> _ListErrorLog:
280+
"""The error log of the last (or current) run of the feed parser.
281+
282+
See Also
283+
--------
284+
- [API Documentation](https://lxml.de/apidoc/lxml.etree.html#lxml.etree._FeedParser.feed_error_log)
285+
"""
286+
def feed(self, data: _AnyStr) -> None:
287+
"""Feeds data to the parser. The argument should be an 8-bit string
288+
buffer containing encoded data, although Unicode is supported as long
289+
as both string types are not mixed.
290+
291+
See Also
292+
--------
293+
- [API Documentation](https://lxml.de/apidoc/lxml.etree.html#lxml.etree._FeedParser.feed)
294+
"""
230295

231296
class HTMLPullParser(_PullParserMixin, HTMLParser[_ET_co]):
232297
@overload

0 commit comments

Comments
 (0)