I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,6 @@
import sys
from fontTools.subset import main
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,184 @@
from fontTools.misc import psCharStrings
from fontTools import ttLib
from fontTools.pens.basePen import NullPen
from fontTools.misc.roundTools import otRound
from fontTools.misc.loggingTools import deprecateFunction
from fontTools.subset.util import _add_method, _uniq_sort
class _ClosureGlyphsT2Decompiler(psCharStrings.SimpleT2Decompiler):
def __init__(self, components, localSubrs, globalSubrs):
psCharStrings.SimpleT2Decompiler.__init__(self, localSubrs, globalSubrs)
self.components = components
def op_endchar(self, index):
args = self.popall()
if len(args) >= 4:
from fontTools.encodings.StandardEncoding import StandardEncoding
# endchar can do seac accent bulding; The T2 spec says it's deprecated,
# but recent software that shall remain nameless does output it.
adx, ady, bchar, achar = args[-4:]
baseGlyph = StandardEncoding[bchar]
accentGlyph = StandardEncoding[achar]
self.components.add(baseGlyph)
self.components.add(accentGlyph)
@_add_method(ttLib.getTableClass("CFF "))
def closure_glyphs(self, s):
cff = self.cff
assert len(cff) == 1
font = cff[cff.keys()[0]]
glyphSet = font.CharStrings
decompose = s.glyphs
while decompose:
components = set()
for g in decompose:
if g not in glyphSet:
continue
gl = glyphSet[g]
subrs = getattr(gl.private, "Subrs", [])
decompiler = _ClosureGlyphsT2Decompiler(components, subrs, gl.globalSubrs)
decompiler.execute(gl)
components -= s.glyphs
s.glyphs.update(components)
decompose = components
def _empty_charstring(font, glyphName, isCFF2, ignoreWidth=False):
c, fdSelectIndex = font.CharStrings.getItemAndSelector(glyphName)
if isCFF2 or ignoreWidth:
# CFF2 charstrings have no widths nor 'endchar' operators
c.setProgram([] if isCFF2 else ["endchar"])
else:
if hasattr(font, "FDArray") and font.FDArray is not None:
private = font.FDArray[fdSelectIndex].Private
else:
private = font.Private
dfltWdX = private.defaultWidthX
nmnlWdX = private.nominalWidthX
pen = NullPen()
c.draw(pen) # this will set the charstring's width
if c.width != dfltWdX:
c.program = [c.width - nmnlWdX, "endchar"]
else:
c.program = ["endchar"]
@_add_method(ttLib.getTableClass("CFF "))
def prune_pre_subset(self, font, options):
cff = self.cff
# CFF table must have one font only
cff.fontNames = cff.fontNames[:1]
if options.notdef_glyph and not options.notdef_outline:
isCFF2 = cff.major > 1
for fontname in cff.keys():
font = cff[fontname]
_empty_charstring(font, ".notdef", isCFF2=isCFF2)
# Clear useless Encoding
for fontname in cff.keys():
font = cff[fontname]
# https://github.com/fonttools/fonttools/issues/620
font.Encoding = "StandardEncoding"
return True # bool(cff.fontNames)
@_add_method(ttLib.getTableClass("CFF "))
def subset_glyphs(self, s):
cff = self.cff
for fontname in cff.keys():
font = cff[fontname]
cs = font.CharStrings
glyphs = s.glyphs.union(s.glyphs_emptied)
# Load all glyphs
for g in font.charset:
if g not in glyphs:
continue
c, _ = cs.getItemAndSelector(g)
if cs.charStringsAreIndexed:
indices = [i for i, g in enumerate(font.charset) if g in glyphs]
csi = cs.charStringsIndex
csi.items = [csi.items[i] for i in indices]
del csi.file, csi.offsets
if hasattr(font, "FDSelect"):
sel = font.FDSelect
sel.format = None
sel.gidArray = [sel.gidArray[i] for i in indices]
newCharStrings = {}
for indicesIdx, charsetIdx in enumerate(indices):
g = font.charset[charsetIdx]
if g in cs.charStrings:
newCharStrings[g] = indicesIdx
cs.charStrings = newCharStrings
else:
cs.charStrings = {g: v for g, v in cs.charStrings.items() if g in glyphs}
font.charset = [g for g in font.charset if g in glyphs]
font.numGlyphs = len(font.charset)
if s.options.retain_gids:
isCFF2 = cff.major > 1
for g in s.glyphs_emptied:
_empty_charstring(font, g, isCFF2=isCFF2, ignoreWidth=True)
return True # any(cff[fontname].numGlyphs for fontname in cff.keys())
@_add_method(ttLib.getTableClass("CFF "))
def prune_post_subset(self, ttfFont, options):
cff = self.cff
for fontname in cff.keys():
font = cff[fontname]
cs = font.CharStrings
# Drop unused FontDictionaries
if hasattr(font, "FDSelect"):
sel = font.FDSelect
indices = _uniq_sort(sel.gidArray)
sel.gidArray = [indices.index(ss) for ss in sel.gidArray]
arr = font.FDArray
arr.items = [arr[i] for i in indices]
del arr.file, arr.offsets
# Desubroutinize if asked for
if options.desubroutinize:
cff.desubroutinize()
# Drop hints if not needed
if not options.hinting:
self.remove_hints()
elif not options.desubroutinize:
self.remove_unused_subroutines()
return True
@deprecateFunction(
"use 'CFFFontSet.desubroutinize()' instead", category=DeprecationWarning
)
@_add_method(ttLib.getTableClass("CFF "))
def desubroutinize(self):
self.cff.desubroutinize()
@deprecateFunction(
"use 'CFFFontSet.remove_hints()' instead", category=DeprecationWarning
)
@_add_method(ttLib.getTableClass("CFF "))
def remove_hints(self):
self.cff.remove_hints()
@deprecateFunction(
"use 'CFFFontSet.remove_unused_subroutines' instead", category=DeprecationWarning
)
@_add_method(ttLib.getTableClass("CFF "))
def remove_unused_subroutines(self):
self.cff.remove_unused_subroutines()

View File

@ -0,0 +1,253 @@
from __future__ import annotations
import re
from functools import lru_cache
from itertools import chain, count
from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple
try:
from lxml import etree
except ImportError:
# lxml is required for subsetting SVG, but we prefer to delay the import error
# until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table)
etree = None
from fontTools import ttLib
from fontTools.subset.util import _add_method
from fontTools.ttLib.tables.S_V_G_ import SVGDocument
__all__ = ["subset_glyphs"]
GID_RE = re.compile(r"^glyph(\d+)$")
NAMESPACES = {
"svg": "http://www.w3.org/2000/svg",
"xlink": "http://www.w3.org/1999/xlink",
}
XLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href'
# TODO(antrotype): Replace with functools.cache once we are 3.9+
@lru_cache(maxsize=None)
def xpath(path):
# compile XPath upfront, caching result to reuse on multiple elements
return etree.XPath(path, namespaces=NAMESPACES)
def group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]:
# select all svg elements with 'id' attribute no matter where they are
# including the root element itself:
# https://github.com/fonttools/fonttools/issues/2548
return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)}
def parse_css_declarations(style_attr: str) -> Dict[str, str]:
# https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style
# https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations
result = {}
for declaration in style_attr.split(";"):
if declaration.count(":") == 1:
property_name, value = declaration.split(":")
property_name = property_name.strip()
result[property_name] = value.strip()
elif declaration.strip():
raise ValueError(f"Invalid CSS declaration syntax: {declaration}")
return result
def iter_referenced_ids(tree: etree.Element) -> Iterator[str]:
# Yield all the ids that can be reached via references from this element tree.
# We currently support xlink:href (as used by <use> and gradient templates),
# and local url(#...) links found in fill or clip-path attributes
# TODO(anthrotype): Check we aren't missing other supported kinds of reference
find_svg_elements_with_references = xpath(
".//svg:*[ "
"starts-with(@xlink:href, '#') "
"or starts-with(@fill, 'url(#') "
"or starts-with(@clip-path, 'url(#') "
"or contains(@style, ':url(#') "
"]",
)
for el in chain([tree], find_svg_elements_with_references(tree)):
ref_id = href_local_target(el)
if ref_id is not None:
yield ref_id
attrs = el.attrib
if "style" in attrs:
attrs = {**dict(attrs), **parse_css_declarations(el.attrib["style"])}
for attr in ("fill", "clip-path"):
if attr in attrs:
value = attrs[attr]
if value.startswith("url(#") and value.endswith(")"):
ref_id = value[5:-1]
assert ref_id
yield ref_id
def closure_element_ids(
elements: Dict[str, etree.Element], element_ids: Set[str]
) -> None:
# Expand the initial subset of element ids to include ids that can be reached
# via references from the initial set.
unvisited = element_ids
while unvisited:
referenced: Set[str] = set()
for el_id in unvisited:
if el_id not in elements:
# ignore dangling reference; not our job to validate svg
continue
referenced.update(iter_referenced_ids(elements[el_id]))
referenced -= element_ids
element_ids.update(referenced)
unvisited = referenced
def subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool:
# Keep elements if their id is in the subset, or any of their children's id is.
# Drop elements whose id is not in the subset, and either have no children,
# or all their children are being dropped.
if el.attrib.get("id") in retained_ids:
# if id is in the set, don't recurse; keep whole subtree
return True
# recursively subset all the children; we use a list comprehension instead
# of a parentheses-less generator expression because we don't want any() to
# short-circuit, as our function has a side effect of dropping empty elements.
if any([subset_elements(e, retained_ids) for e in el]):
return True
assert len(el) == 0
parent = el.getparent()
if parent is not None:
parent.remove(el)
return False
def remap_glyph_ids(
svg: etree.Element, glyph_index_map: Dict[int, int]
) -> Dict[str, str]:
# Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}"
# special attributes
elements = group_elements_by_id(svg)
id_map = {}
for el_id, el in elements.items():
m = GID_RE.match(el_id)
if not m:
continue
old_index = int(m.group(1))
new_index = glyph_index_map.get(old_index)
if new_index is not None:
if old_index == new_index:
continue
new_id = f"glyph{new_index}"
else:
# If the old index is missing, the element correspond to a glyph that was
# excluded from the font's subset.
# We rename it to avoid clashes with the new GIDs or other element ids.
new_id = f".{el_id}"
n = count(1)
while new_id in elements:
new_id = f"{new_id}.{next(n)}"
id_map[el_id] = new_id
el.attrib["id"] = new_id
return id_map
def href_local_target(el: etree.Element) -> Optional[str]:
if XLINK_HREF in el.attrib:
href = el.attrib[XLINK_HREF]
if href.startswith("#") and len(href) > 1:
return href[1:] # drop the leading #
return None
def update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None:
# update all xlink:href="#glyph..." attributes to point to the new glyph ids
for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg):
old_id = href_local_target(el)
assert old_id is not None
if old_id in id_map:
new_id = id_map[old_id]
el.attrib[XLINK_HREF] = f"#{new_id}"
def ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]:
# Yield sorted, non-overlapping (min, max) ranges of consecutive integers
sorted_ints = iter(sorted(set(ints)))
try:
start = end = next(sorted_ints)
except StopIteration:
return
for v in sorted_ints:
if v - 1 == end:
end = v
else:
yield (start, end)
start = end = v
yield (start, end)
@_add_method(ttLib.getTableClass("SVG "))
def subset_glyphs(self, s) -> bool:
if etree is None:
raise ImportError("No module named 'lxml', required to subset SVG")
# glyph names (before subsetting)
glyph_order: List[str] = s.orig_glyph_order
# map from glyph names to original glyph indices
rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap
# map from original to new glyph indices (after subsetting)
glyph_index_map: Dict[int, int] = s.glyph_index_map
new_docs: List[SVGDocument] = []
for doc in self.docList:
glyphs = {
glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1)
}.intersection(s.glyphs)
if not glyphs:
# no intersection: we can drop the whole record
continue
svg = etree.fromstring(
# encode because fromstring dislikes xml encoding decl if input is str.
# SVG xml encoding must be utf-8 as per OT spec.
doc.data.encode("utf-8"),
parser=etree.XMLParser(
# Disable libxml2 security restrictions to support very deep trees.
# Without this we would get an error like this:
# `lxml.etree.XMLSyntaxError: internal error: Huge input lookup`
# when parsing big fonts e.g. noto-emoji-picosvg.ttf.
huge_tree=True,
# ignore blank text as it's not meaningful in OT-SVG; it also prevents
# dangling tail text after removing an element when pretty_print=True
remove_blank_text=True,
# don't replace entities; we don't expect any in OT-SVG and they may
# be abused for XXE attacks
resolve_entities=False,
),
)
elements = group_elements_by_id(svg)
gids = {rev_orig_glyph_map[g] for g in glyphs}
element_ids = {f"glyph{i}" for i in gids}
closure_element_ids(elements, element_ids)
if not subset_elements(svg, element_ids):
continue
if not s.options.retain_gids:
id_map = remap_glyph_ids(svg, glyph_index_map)
update_glyph_href_links(svg, id_map)
new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8")
new_gids = (glyph_index_map[i] for i in gids)
for start, end in ranges(new_gids):
new_docs.append(SVGDocument(new_doc, start, end, doc.compressed))
self.docList = new_docs
return bool(self.docList)

View File

@ -0,0 +1,25 @@
"""Private utility methods used by the subset modules"""
def _add_method(*clazzes):
"""Returns a decorator function that adds a new method to one or
more classes."""
def wrapper(method):
done = []
for clazz in clazzes:
if clazz in done:
continue # Support multiple names of a clazz
done.append(clazz)
assert clazz.__name__ != "DefaultTable", "Oops, table class not found."
assert not hasattr(
clazz, method.__name__
), "Oops, class '%s' has method '%s'." % (clazz.__name__, method.__name__)
setattr(clazz, method.__name__, method)
return None
return wrapper
def _uniq_sort(l):
return sorted(set(l))