-
Notifications
You must be signed in to change notification settings - Fork 106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Render Python DocTest correctly #327
Comments
I have found a quick solution for myself for this particular problem. This is clearly a google style format, so I checked the contrib/processors/google.py file. def _process(self, node: docspec.ApiObject):
if not node.docstring:
return
lines = []
current_lines: t.List[str] = []
in_codeblock = False
keyword = None
def _commit():
if keyword:
generate_sections_markdown(lines, {keyword: current_lines})
else:
lines.extend(current_lines)
current_lines.clear()
for line in node.docstring.content.split("\n"):
if line.lstrip().startswith("```"):
in_codeblock = not in_codeblock
current_lines.append(line)
continue
if in_codeblock:
current_lines.append(line)
continue
line = line.strip()
if line in self._keywords_map:
_commit()
keyword = self._keywords_map[line]
# new start
if keyword == 'Examples':
current_lines.append('```python')
# new end
continue
if keyword is None:
lines.append(line)
continue
for param_re in self._param_res:
param_match = param_re.match(line)
if param_match:
if "type" in param_match.groupdict():
current_lines.append("- `{param}` _{type}_ - {desc}".format(**param_match.groupdict()))
else:
current_lines.append("- `{param}` - {desc}".format(**param_match.groupdict()))
break
if not param_match:
current_lines.append(" {line}".format(line=line))
# new start
if keyword == 'Examples':
current_lines.append('```')
# new end
_commit()
node.docstring.content = "\n".join(lines) I hope this helps in finding a solution that fits not only my problem |
Thanks for your suggestion! |
Sorry to be responding soo late. Busy time at work. I will then try to run pydoc-markdown on your supplied function |
Sure! taxon.py# -*- coding: utf-8 -*-
"""
* @Date: 2023-03-26 09:46:05
* @LastEditors: hwrn [email protected]
* @LastEditTime: 2024-05-07 21:30:27
* @FilePath: /meer-omics/src/taxon.py
* @Description:
"""
# """
import os
from typing import Iterable
from collections import Counter
from typing import Final, Generator, Iterable, Union
levels: Final = "d", "p", "c", "o", "f", "g", "s"
levels_full: Final = "Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"
root_level: Final = "r"
default_prefixes: Final = {i: f"{i}__" for i in levels}
def __fix_domain_prefix(string: str):
if string.startswith("k__"):
return "d__" + string[3:]
return string
def fill_lineage(substring: str):
"""
Taxon key must be in lineage
>>> gene2KaijuAnnot.fill_lineage("k__Bacteria")
"d__Bacteria;p__;c__;o__;f__;g__;s__"
>>> gene2KaijuAnnot.fill_lineage("")
"d__;p__;c__;o__;f__;g__;s__"
"""
lineages: dict[str, str] = {
**default_prefixes, # type: ignore
}
# check format
for i in __fix_domain_prefix(substring).split(";"):
if i == "":
continue
assert i[0] in levels and i[1:3] == "__"
lineages[i[0]] = i
# detect
lineage_str = ""
for i in levels:
lineage_str += lineages[i] + ";"
return lineage_str[:-1]
GenOrIter = Generator[str, None, None] | Iterable[str]
def lca(taxons: GenOrIter, fill=False, sep=";"):
"""
Taxons key must be in lineage
>>> gene2KaijuAnnot.lca(
... [
... "k__Bacteria;p__Proteobacteria;c__Acidithiobaci...",
... "k__Bacteria;p__Proteobacteria;c__Gammaproteoba...",
... ]
... )
"d__Bacteria;p__Proteobacteria"
>>> gene2KaijuAnnot.lca(
... [
... "k__Bacteria;p__Proteobacteria;c__Acidithiobaci...",
... "k__Bacteria;p__Proteobacteria;c__Gammaproteoba...",
... ],
... fill=True
... )
"d__Bacteria;p__Proteobacteria;c__;o__;f__;g__;s__"
"""
prefix = os.path.commonprefix(
[__fix_domain_prefix(i) + sep for i in taxons]
).rsplit(sep, 1)[0]
if fill:
return fill_lineage(prefix)
return prefix
def iter_rsplit(s: str, sep=";"):
"""
>>> list(iter_rsplit("a;b;c;d;e;f;g", ";"))
['a;b;c;d;e;f;g', 'a;b;c;d;e;f', 'a;b;c;d;e', 'a;b;c;d', 'a;b;c', 'a;b', 'a']
"""
r = len(s)
# if not s.endswith(sep):
# yield s
yield s
while (r := s.rfind(sep, 0, r)) != -1:
yield s[:r]
def lca_vote(taxons: GenOrIter, part=1 / 2, majority=1, sep=";"):
"""
>>> from workflow.contigs.collect_annots import lca_vote
>>> lca_vote(["a;b;c;d;e;f;g", "a;b;c;d;e1;f;g"])
(3, 'a;b;c;d', [('a;b;c;d', 2)])
>>> lca_vote(["a;b;c;d;e;f;g", "a;b;c;d;e1;f;g"], part=1/3)
(0, 'a;b;c;d;e;f;g', [('a;b;c;d;e;f;g', 1), ('a;b;c;d;e1;f;g', 1)])
>>> lca_vote(["a;b;c;d;e;f;g", "a;b;c;d;e1;f;g"], majority=1/2)
(0, 'a;b;c;d;e;f;g', [('a;b;c;d;e;f;g', 1), ('a;b;c;d;e1;f;g', 1)])
>>> lca_vote(["a;b;c;d;e;f;g", "a;b;c;d;e1;f;g"], sep="e")
(1, 'a;b;c;d;', [('a;b;c;d;', 2)])
"""
_taxons = list(taxons)
if not _taxons:
return None, "", []
assert len({i.count(sep) for i in _taxons}) == 1, "taxons not same level"
win_ge = len(_taxons) * majority
win_gt = len(_taxons) * part
taxons_i: Iterable[str]
for taxon_level_i, taxons_i in enumerate(
zip(*(iter_rsplit(i, sep) for i in _taxons))
):
for common_taxon_i, n in Counter(taxons_i).most_common():
if n > win_gt or n >= win_ge:
if common_taxon_i and not common_taxon_i.endswith("__"):
return (
taxon_level_i,
common_taxon_i,
Counter(taxons_i).most_common(),
)
else:
break
return None, "", Counter(taxons_i).most_common()
def last_annots(lineage_str: str):
"""
Taxons key must be in lineage
>>> gene2KaijuAnnot.last_annots("k__Bacteria;p__Proteobacteria;c__;o__;f__;g__;s__")
('p', 'p__Proteobacteria')
"""
lineages: dict[str, str] = {
**{i.split("__", 1)[0]: i for i in __fix_domain_prefix(lineage_str).split(";")},
}
for i in reversed(levels):
if i in lineages:
if len(lineages[i]) != 3:
return i, lineages[i]
return root_level, ""
def taxon_split(
lineage_str: str, start: Union[str, int] = "d", end: Union[str, int] = "o"
):
"""
start and end must in [1..7] or taxons
>>> gene2KaijuAnnot.taxon_split("k__Bacteria;p__Proteobacteria;c__;o__;f__;g__;s__", 1, 4)
'd__Bacteria;p__Proteobacteria;c__;o__'
"""
start_ = start - 1 if isinstance(start, int) else levels.index(start)
end_ = end if isinstance(end, int) else levels.index(end) + 1
return ";".join(fill_lineage(lineage_str).split(";")[start_:end_]) |
Thanks for this usefull software!
Is your feature request related to a problem? Please describe.
I'm using pydoc-markdown to generate markdown files. However, all things in docstring will be write to markdown almost as-is, and when there is a function:
It will translate to:
and render as:
iter_rsplit
However, it is not a clear way to view it
Describe the solution you'd like
I would like pydoc-markdown find
>>>
in docstring and render like this:Additional context
All backslashes preceding backquotes within code blocks solely serve to prevent Markdown formatting issues.
The text was updated successfully, but these errors were encountered: