Skip to content

Commit

Permalink
refactor: use email.message.EmailMessage (#142)
Browse files Browse the repository at this point in the history
Signed-off-by: Henry Schreiner <[email protected]>
  • Loading branch information
henryiii authored Sep 13, 2024
1 parent c2b77b2 commit 5966114
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 51 deletions.
94 changes: 44 additions & 50 deletions pyproject_metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import copy
import dataclasses
import email.message
import email.policy
import email.utils
import os
import os.path
Expand Down Expand Up @@ -113,46 +115,36 @@ class ConfigurationWarning(UserWarning):
"""Warnings about backend metadata."""


class RFC822Message:
"""Python-flavored RFC 822 message implementation."""

__slots__ = ('_headers', '_body')

def __init__(self) -> None:
self._headers: list[tuple[str, str]] = []
self._body: str | None = None
@dataclasses.dataclass
class _SmartMessageSetter:
"""
This provides a nice internal API for setting values in an RFC822Message to
reduce boilerplate.
def items(self) -> list[tuple[str, str]]:
return self._headers.copy()
If a value is None, do nothing.
If a value contains a newline, indent it (may produce a warning in the future).
"""

def get_all(self, name: str) -> None | list[str]:
return [v for k, v in self.items() if k == name]
message: email.message.EmailMessage

def __setitem__(self, name: str, value: str | None) -> None:
if not value:
return
self._headers.append((name, value))
if '\n' in value:
value = value.replace('\n', '\n ')
self.message[name] = value

def __str__(self) -> str:
text = ''
for name, entry in self.items():
lines = entry.strip('\n').split('\n')
text += f'{name}: {lines[0]}\n'
for line in lines[1:]:
text += ' ' * 8 + line + '\n'
text += '\n'
if self._body:
text += self._body
return text

def __bytes__(self) -> bytes:
return str(self).encode()
class RFC822Message(email.message.EmailMessage):
"""Python-flavored RFC 822 message implementation."""

def get_payload(self) -> str | None:
return self._body
__slots__ = ()

def set_payload(self, body: str) -> None:
self._body = body
def __init__(self) -> None:
super().__init__(email.policy.compat32)

def __str__(self) -> str:
return bytes(self).decode('utf-8')

This comment has been minimized.

Copy link
@dnicolodi

dnicolodi Sep 13, 2024

Contributor

Why do we need this? email.message.EmailMessage has a __str__ method that should be pretty much equivalent.

This comment has been minimized.

Copy link
@henryiii

henryiii Sep 13, 2024

Author Collaborator

Easy enough to check:

/usr/local/Cellar/[email protected]/3.12.6/Frameworks/Python.framework/Versions/3.12/lib/python3.12/email/message.py:1003: in __str__
    return self.as_string(policy=self.policy.clone(utf8=True))
        self       = <pyproject_metadata.RFC822Message object at 0x10692e600>
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = Compat32(), kw = {'utf8': True}, newpolicy = Compat32(), attr = 'utf8', value = True

    def clone(self, **kw):
        """Return a new instance with specified attributes changed.

        The new instance has the same attribute values as the current object,
        except for the changes passed in as keyword arguments.

        """
        newpolicy = self.__class__.__new__(self.__class__)
        for attr, value in self.__dict__.items():
            object.__setattr__(newpolicy, attr, value)
        for attr, value in kw.items():
            if not hasattr(self, attr):
>               raise TypeError(
                    "{!r} is an invalid keyword argument for {}".format(
                        attr, self.__class__.__name__))
E               TypeError: 'utf8' is an invalid keyword argument for Compat32

However, I've noticed some people using EmailMessage with a custom policy that supports utf8, so let me check that.

This comment has been minimized.

This comment has been minimized.

Copy link
@henryiii

henryiii Sep 13, 2024

Author Collaborator

Which also might fix #149 if we are lucky...

This comment has been minimized.

Copy link
@dnicolodi

dnicolodi Sep 13, 2024

Contributor

Oh, indeed. I now remember bumping into this already. And I think it is a Python bug. But the email.message is enough of a inheritance mess that I gave up trying to understand where things break, why, and if they are even supposed to work.

This comment has been minimized.

Copy link
@dnicolodi

dnicolodi Sep 13, 2024

Contributor

This failure and #149 definitely seem related. What does the metadata specification say about encodings? I remember it saying that the metadata need to be parseable using the compat32 policy, but I don't know what that means in terms of encoding of non-ascii strings.

It would probably be nice to have some round-trip tests that serialize some funny corner cases and see if the data can be read back correctly with packaging and with email.parser.Parser(policy=email.policy.compat32).

This comment has been minimized.

Copy link
@henryiii

henryiii Sep 13, 2024

Author Collaborator

I thought we had utf-8 tests, but maybe not. I think I have an idea, though. Have to get to work and a meeting, so will try it in a couple of hours.

This comment has been minimized.

Copy link
@dnicolodi

dnicolodi Sep 13, 2024

Contributor

The specification https://packaging.python.org/en/latest/specifications/core-metadata/#core-metadata actually says that the encoding needs to be utf8.



class DataFetcher:
Expand Down Expand Up @@ -567,58 +559,60 @@ def as_rfc822(self) -> RFC822Message:
self.write_to_rfc822(message)
return message

def write_to_rfc822(self, message: RFC822Message) -> None: # noqa: C901
def write_to_rfc822(self, message: email.message.EmailMessage) -> None: # noqa: C901

This comment has been minimized.

Copy link
@dnicolodi

dnicolodi Sep 13, 2024

Contributor

I don't understand why this method is required.

In the optic o making the code as simple as possible, if we are doing other compatibility breaking changes, I would get rid of it entirely and just keep to_rcf822(). This would also allow to remove the RFC822Message subclass from above and have to_rfc822() return a standard email.message.EmailMessage instance. The RFC822Message main job is to have an email.message.EmailMessage with compat32 policy by default. If we get rid of this method, we can create the EmailMessage instance with the right policy from the start, removing the need for the subclass.

This comment has been minimized.

Copy link
@henryiii

henryiii Sep 13, 2024

Author Collaborator

The goal is to not have any breaking changes; all the stuff removed doesn't seem to be used by anyone. Lots of people use __str__, though. This exists entirely for __str__. We can get rid of it in #140.

This comment has been minimized.

Copy link
@dnicolodi

dnicolodi Sep 13, 2024

Contributor

I don't think anyone is using write_to_rfc822() either. That's why I was proposing to get rid of it as part of the refactoring and cleanup you have been doing (thank you!)

self.validate()

message['Metadata-Version'] = self.metadata_version
message['Name'] = self.name
smart_message = _SmartMessageSetter(message)

smart_message['Metadata-Version'] = self.metadata_version
smart_message['Name'] = self.name
if not self.version:
msg = 'Missing version field'
raise ConfigurationError(msg)
message['Version'] = str(self.version)
smart_message['Version'] = str(self.version)
# skip 'Platform'
# skip 'Supported-Platform'
if self.description:
message['Summary'] = self.description
message['Keywords'] = ','.join(self.keywords)
smart_message['Summary'] = self.description
smart_message['Keywords'] = ','.join(self.keywords)
if 'homepage' in self.urls:
message['Home-page'] = self.urls['homepage']
smart_message['Home-page'] = self.urls['homepage']
# skip 'Download-URL'
message['Author'] = self._name_list(self.authors)
message['Author-Email'] = self._email_list(self.authors)
message['Maintainer'] = self._name_list(self.maintainers)
message['Maintainer-Email'] = self._email_list(self.maintainers)
smart_message['Author'] = self._name_list(self.authors)
smart_message['Author-Email'] = self._email_list(self.authors)
smart_message['Maintainer'] = self._name_list(self.maintainers)
smart_message['Maintainer-Email'] = self._email_list(self.maintainers)
if self.license:
message['License'] = self.license.text
smart_message['License'] = self.license.text
for classifier in self.classifiers:
message['Classifier'] = classifier
smart_message['Classifier'] = classifier
# skip 'Provides-Dist'
# skip 'Obsoletes-Dist'
# skip 'Requires-External'
for name, url in self.urls.items():
message['Project-URL'] = f'{name.capitalize()}, {url}'
smart_message['Project-URL'] = f'{name.capitalize()}, {url}'
if self.requires_python:
message['Requires-Python'] = str(self.requires_python)
smart_message['Requires-Python'] = str(self.requires_python)
for dep in self.dependencies:
message['Requires-Dist'] = str(dep)
smart_message['Requires-Dist'] = str(dep)
for extra, requirements in self.optional_dependencies.items():
norm_extra = extra.replace('.', '-').replace('_', '-').lower()
message['Provides-Extra'] = norm_extra
smart_message['Provides-Extra'] = norm_extra
for requirement in requirements:
message['Requires-Dist'] = str(
smart_message['Requires-Dist'] = str(
self._build_extra_req(norm_extra, requirement)
)
if self.readme:
if self.readme.content_type:
message['Description-Content-Type'] = self.readme.content_type
smart_message['Description-Content-Type'] = self.readme.content_type
message.set_payload(self.readme.text)
# Core Metadata 2.2
if self.metadata_version != '2.1':
for field in self.dynamic:
if field in ('name', 'version'):
msg = f'Field cannot be dynamic: {field}'
raise ConfigurationError(msg)
message['Dynamic'] = field
smart_message['Dynamic'] = field

def _name_list(self, people: list[tuple[str, str | None]]) -> str:
return ', '.join(name for name, email_ in people if not email_)
Expand Down
3 changes: 2 additions & 1 deletion tests/test_rfc822.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@
)
def test_headers(items: list[tuple[str, str]], data: str) -> None:
message = pyproject_metadata.RFC822Message()
smart_message = pyproject_metadata._SmartMessageSetter(message)

for name, value in items:
message[name] = value
smart_message[name] = value

data = textwrap.dedent(data) + '\n'
assert str(message) == data
Expand Down

0 comments on commit 5966114

Please sign in to comment.