"""
Directed multi-graph I/O module.
This module provides format handlers for reading and writing directed multi-graphs
to/from files. Format handlers are registered with FormatRegistry for use with
the IOMixin system.
The following format handlers are defined and registered:
- **dot**: DOT format (Graphviz) (extensions: .dot, .gv)
- Writer: `to_dot()` - Converts DirectedMultiGraph to DOT string
- Reader: `from_dot()` - Parses DOT string to DirectedMultiGraph
- **edgelist**: Edge-list format (extensions: .el)
- Writer: `to_edgelist()` - Converts DirectedMultiGraph to edge-list string
- Reader: `from_edgelist()` - Parses edge-list string to DirectedMultiGraph
These handlers are automatically registered when this module is imported.
DirectedMultiGraph inherits from IOMixin, so you can use:
- `graph.save('file.dot')` - Save to file (auto-detects format)
- `graph.load('file.dot')` - Load from file (auto-detects format)
- `graph.to_string(format='dot')` - Convert to string
- `graph.from_string(string, format='edgelist')` - Parse from string
- `DirectedMultiGraph.convert('in.dot', 'out.el')` - Convert between formats
Notes
-----
DOT format supports:
- Node attributes (label, shape, color, etc.)
- Edge attributes (label, weight, color, etc.)
- Graph attributes
- Parallel edges (multigraph support)
Edge-list format:
- Simple text format: one edge per line
- Format: `u v` or `u v key` or `u v key attr1=value1 attr2=value2`
- Uses node_id as the label/name
"""
from __future__ import annotations
import re
from typing import Any
from phylozoo.utils.exceptions import PhyloZooParseError
from phylozoo.utils.io import FormatRegistry
from .base import DirectedMultiGraph
def _escape_dot_string(s: str) -> str:
"""
Escape a string for use in DOT format.
Parameters
----------
s : str
String to escape.
Returns
-------
str
Escaped string.
"""
# If string contains special characters or spaces, wrap in quotes
if any(c in s for c in [" ", "\t", "\n", '"', "\\", "[", "]", "{", "}", "-", ">"]):
# Escape backslashes and quotes
s = s.replace("\\", "\\\\")
s = s.replace('"', '\\"')
return f'"{s}"'
return s
def _format_dot_attributes(attrs: dict[str, Any]) -> str:
"""
Format attributes for DOT format.
Parameters
----------
attrs : dict[str, Any]
Dictionary of attributes.
Returns
-------
str
Formatted attribute string like '[key1=value1, key2=value2]'.
"""
if not attrs:
return ""
parts = []
for key, value in attrs.items():
# Convert value to string and escape if needed
if isinstance(value, str):
value_str = _escape_dot_string(value)
elif isinstance(value, (int, float)):
value_str = str(value)
elif isinstance(value, bool):
value_str = "true" if value else "false"
else:
value_str = _escape_dot_string(str(value))
parts.append(f"{key}={value_str}")
return "[" + ", ".join(parts) + "]"
[docs]
def to_dot(graph: DirectedMultiGraph, **kwargs: Any) -> str:
"""
Convert a DirectedMultiGraph to a DOT format string.
Parameters
----------
graph : DirectedMultiGraph
The directed multi-graph to convert.
**kwargs
Additional arguments (currently unused, for compatibility).
Returns
-------
str
The DOT format string representation of the graph.
Examples
--------
>>> from phylozoo.core.primitives.d_multigraph import DirectedMultiGraph
>>> from phylozoo.core.primitives.d_multigraph.io import to_dot
>>>
>>> G = DirectedMultiGraph()
>>> G.add_edge(1, 2, weight=1.0)
0
>>> G.add_edge(2, 3, weight=2.0)
0
>>> dot_str = to_dot(G)
>>> 'digraph' in dot_str
True
>>> '1 -> 2' in dot_str
True
Notes
-----
The DOT format includes:
- digraph declaration
- Node declarations with attributes
- Edge declarations with attributes
- Graph attributes (if any)
- Support for parallel edges (multigraph)
"""
lines = []
# Graph name (optional, use empty string)
graph_name = kwargs.get("graph_name", "")
if graph_name:
lines.append(f"digraph {_escape_dot_string(graph_name)} {{")
else:
lines.append("digraph {")
# Graph attributes
if hasattr(graph, "_graph") and hasattr(graph._graph, "graph"):
graph_attrs = graph._graph.graph
if graph_attrs:
for key, value in graph_attrs.items():
if isinstance(value, str):
value_str = _escape_dot_string(value)
else:
value_str = str(value)
lines.append(f" {key}={value_str};")
# Node declarations with attributes
for node in graph.nodes():
node_attrs = {}
if hasattr(graph, "_graph") and node in graph._graph:
node_data = graph._graph.nodes[node]
if node_data:
node_attrs = dict(node_data)
# Use node_id as label if no label attribute (as per user requirement)
if "label" not in node_attrs:
node_attrs["label"] = str(node)
node_id_str = _escape_dot_string(str(node))
attrs_str = _format_dot_attributes(node_attrs)
if attrs_str:
lines.append(f" {node_id_str} {attrs_str};")
else:
lines.append(f" {node_id_str};")
# Edge declarations with attributes
for u, v, key, data in graph.edges_iter(keys=True, data=True):
u_str = _escape_dot_string(str(u))
v_str = _escape_dot_string(str(v))
# Include key in edge attributes if there are parallel edges
edge_attrs = dict(data) if data else {}
# Add key as attribute if there are multiple edges between u and v
if graph._graph.number_of_edges(u, v) > 1:
edge_attrs["key"] = key
attrs_str = _format_dot_attributes(edge_attrs)
if attrs_str:
lines.append(f" {u_str} -> {v_str} {attrs_str};")
else:
lines.append(f" {u_str} -> {v_str};")
lines.append("}")
return "\n".join(lines) + "\n"
[docs]
def from_dot(dot_string: str, **kwargs: Any) -> DirectedMultiGraph:
"""
Parse a DOT format string and create a DirectedMultiGraph.
Parameters
----------
dot_string : str
DOT format string containing graph data.
**kwargs
Additional arguments (currently unused, for compatibility).
Returns
-------
DirectedMultiGraph
Parsed directed multi-graph.
Raises
------
PhyloZooParseError
If the DOT string is malformed or cannot be parsed.
Examples
--------
>>> from phylozoo.core.primitives.d_multigraph.io import from_dot
>>>
>>> dot_str = '''digraph {
... 1 [label="Node1"];
... 2 [label="Node2"];
... 1 -> 2 [weight=1.0];
... 2 -> 3 [weight=2.0];
... }'''
>>>
>>> G = from_dot(dot_str)
>>> G.number_of_nodes()
3
>>> G.number_of_edges()
2
Notes
-----
This parser expects:
- digraph declaration
- Node declarations (optional attributes)
- Edge declarations (optional attributes)
- Graph attributes (optional)
- Support for parallel edges
"""
# Remove comments
lines = []
for line in dot_string.split("\n"):
# Remove C-style comments (// and /* */)
# Remove # comments
if "//" in line:
line = line[: line.index("//")]
if "#" in line and not line.strip().startswith("#"):
# Only remove # if it's not part of a string
pass # Keep for now, will handle in parsing
lines.append(line)
content = "\n".join(lines)
# Extract graph name and body
digraph_match = re.search(r'digraph\s+(\w+|"[^"]+")?\s*\{', content, re.IGNORECASE)
if not digraph_match:
raise PhyloZooParseError("Could not find digraph declaration in DOT string")
# Extract graph body (between { and })
brace_count = 0
start_idx = content.index("{")
end_idx = start_idx
for i, char in enumerate(content[start_idx:], start=start_idx):
if char == "{":
brace_count += 1
elif char == "}":
brace_count -= 1
if brace_count == 0:
end_idx = i
break
if brace_count != 0:
raise PhyloZooParseError("Unmatched braces in DOT string")
body = content[start_idx + 1 : end_idx]
# Parse graph attributes, nodes, and edges
graph_attrs = {}
nodes_data: dict[Any, dict[str, Any]] = {}
edges_data: list[tuple[Any, Any, int | None, dict[str, Any]]] = []
# Node pattern: node_id [attributes];
node_pattern = r'(\w+|"[^"]+")\s*(?:\[([^\]]+)\])?\s*;'
# Edge pattern: u -> v [attributes];
edge_pattern = r'(\w+|"[^"]+")\s*->\s*(\w+|"[^"]+")\s*(?:\[([^\]]+)\])?\s*;'
# Graph attribute pattern: key=value; (standalone, not in brackets)
graph_attr_pattern = r"^(\w+)\s*=\s*([^;]+);$"
for line in body.split("\n"):
line = line.strip()
if not line or line.startswith("//") or line.startswith("#"):
continue
# Try to match graph attribute first (standalone key=value;)
graph_attr_match = re.match(graph_attr_pattern, line)
if graph_attr_match:
key = graph_attr_match.group(1).strip()
value = graph_attr_match.group(2).strip().strip("\"'")
graph_attrs[key] = value
continue
# Try to match edge (edges contain ->)
edge_match = re.search(edge_pattern, line)
if edge_match:
u_str = edge_match.group(1).strip("\"'")
v_str = edge_match.group(2).strip("\"'")
attrs_str = edge_match.group(3) if edge_match.group(3) else ""
# Parse attributes
edge_attrs = _parse_dot_attributes(attrs_str)
# Extract key if present
key = None
if "key" in edge_attrs:
try:
key = int(edge_attrs.pop("key"))
except (ValueError, TypeError):
pass
# Convert node strings to appropriate types
u = _convert_node_id(u_str)
v = _convert_node_id(v_str)
edges_data.append((u, v, key, edge_attrs))
continue
# Try to match node
node_match = re.search(node_pattern, line)
if node_match:
node_str = node_match.group(1).strip("\"'")
attrs_str = node_match.group(2) if node_match.group(2) else ""
# Parse attributes
node_attrs = _parse_dot_attributes(attrs_str)
# Convert node string to appropriate type
node_id = _convert_node_id(node_str)
nodes_data[node_id] = node_attrs
continue
# Create graph
graph: Any = DirectedMultiGraph(attributes=graph_attrs if graph_attrs else None)
# Add nodes with attributes
for node_id, attrs in nodes_data.items():
graph.add_node(node_id, **attrs)
# Add edges with attributes
for u, v, key, attrs in edges_data:
# Ensure nodes exist
if u not in graph:
graph.add_node(u)
if v not in graph:
graph.add_node(v)
graph.add_edge(u, v, key=key, **attrs)
return graph # type: ignore[no-any-return]
def _parse_dot_attributes(attrs_str: str) -> dict[str, Any]:
"""
Parse DOT attribute string like 'key1=value1, key2=value2'.
Parameters
----------
attrs_str : str
Attribute string.
Returns
-------
dict[str, Any]
Dictionary of attributes.
"""
attrs: dict[Any, Any] = {}
if not attrs_str.strip():
return attrs
# Split by comma, but respect quoted strings
parts = []
current = ""
in_quotes = False
escape_next = False
for char in attrs_str:
if escape_next:
current += char
escape_next = False
continue
if char == "\\":
escape_next = True
current += char
continue
if char == '"' or char == "'":
in_quotes = not in_quotes
current += char
continue
if char == "," and not in_quotes:
parts.append(current.strip())
current = ""
else:
current += char
if current.strip():
parts.append(current.strip())
# Parse each key=value pair
for part in parts:
if "=" not in part:
continue
key, value = part.split("=", 1)
key = key.strip()
value = value.strip().strip("\"'")
# Try to convert to appropriate type
if value.lower() == "true":
attrs[key] = True
elif value.lower() == "false":
attrs[key] = False
else:
# Try numeric conversion
try:
if "." in value:
attrs[key] = float(value)
else:
attrs[key] = int(value)
except ValueError:
attrs[key] = value
return attrs
def _convert_node_id(node_str: str) -> Any:
"""
Convert node string to appropriate type (int, float, or str).
Parameters
----------
node_str : str
Node string.
Returns
-------
Any
Converted node ID.
"""
# Try int first
try:
return int(node_str)
except ValueError:
pass
# Try float
try:
return float(node_str)
except ValueError:
pass
# Keep as string
return node_str
[docs]
def to_edgelist(graph: DirectedMultiGraph, **kwargs: Any) -> str:
"""
Convert a DirectedMultiGraph to an edge-list format string.
Parameters
----------
graph : DirectedMultiGraph
The directed multi-graph to convert.
**kwargs
Additional arguments (currently unused, for compatibility).
Returns
-------
str
The edge-list format string representation of the graph.
Examples
--------
>>> from phylozoo.core.primitives.d_multigraph import DirectedMultiGraph
>>> from phylozoo.core.primitives.d_multigraph.io import to_edgelist
>>>
>>> G = DirectedMultiGraph()
>>> G.add_edge(1, 2, weight=1.0)
0
>>> G.add_edge(2, 3, weight=2.0)
0
>>> el_str = to_edgelist(G)
>>> '1 2' in el_str
True
>>> '2 3' in el_str
True
Notes
-----
The edge-list format:
- One edge per line
- Format: `u v` or `u v key` or `u v key attr1=value1 attr2=value2`
- Uses node_id as the label/name
- Includes edge keys for parallel edges
- Includes edge attributes if present
"""
lines = []
for u, v, key, data in graph.edges_iter(keys=True, data=True):
u_str = str(u)
v_str = str(v)
# Build line: u v [key] [attributes]
line_parts = [u_str, v_str]
# Add key if there are parallel edges
if graph._graph.number_of_edges(u, v) > 1:
line_parts.append(str(key))
# Add attributes
if data:
for attr_key, attr_value in data.items():
if isinstance(attr_value, str):
# Escape spaces in string values
if " " in attr_value:
attr_value = f'"{attr_value}"'
line_parts.append(f"{attr_key}={attr_value}")
else:
line_parts.append(f"{attr_key}={attr_value}")
lines.append(" ".join(line_parts))
return "\n".join(lines) + "\n"
[docs]
def from_edgelist(edgelist_string: str, **kwargs: Any) -> DirectedMultiGraph:
"""
Parse an edge-list format string and create a DirectedMultiGraph.
Parameters
----------
edgelist_string : str
Edge-list format string containing graph data.
**kwargs
Additional arguments (currently unused, for compatibility).
Returns
-------
DirectedMultiGraph
Parsed directed multi-graph.
Raises
------
PhyloZooParseError
If the edge-list string is malformed or cannot be parsed.
Examples
--------
>>> from phylozoo.core.primitives.d_multigraph.io import from_edgelist
>>>
>>> el_str = '''1 2
... 2 3 weight=2.0
... 3 4 0 key1=value1'''
>>>
>>> G = from_edgelist(el_str)
>>> G.number_of_nodes()
4
>>> G.number_of_edges()
3
Notes
-----
This parser expects:
- One edge per line
- Format: `u v` or `u v key` or `u v key attr1=value1 attr2=value2`
- Uses node_id as the label/name
"""
graph: Any = DirectedMultiGraph()
for line in edgelist_string.strip().split("\n"):
line = line.strip()
if not line or line.startswith("#"):
continue
parts = line.split()
if len(parts) < 2:
raise PhyloZooParseError(f"Invalid edge line (need at least 2 values): {line}")
u_str = parts[0]
v_str = parts[1]
# Convert node strings to appropriate types
u = _convert_node_id(u_str)
v = _convert_node_id(v_str)
# Parse key and attributes
key = None
attrs = {}
if len(parts) > 2:
# Check if third part is a key (integer) or an attribute
third_part = parts[2]
if "=" not in third_part:
# It's a key
try:
key = int(third_part)
start_idx = 3
except ValueError:
start_idx = 2
else:
start_idx = 2
# Parse attributes
for part in parts[start_idx:]:
if "=" not in part:
continue
attr_key, attr_value = part.split("=", 1)
attr_value = attr_value.strip("\"'")
# Try to convert to appropriate type
try:
if "." in attr_value:
attrs[attr_key] = float(attr_value)
else:
attrs[attr_key] = int(attr_value)
except ValueError:
attrs[attr_key] = attr_value
# Add edge
graph.add_edge(u, v, key=key, **attrs)
return graph # type: ignore[no-any-return]
# Register format handlers with FormatRegistry
FormatRegistry.register(
DirectedMultiGraph,
"dot",
reader=from_dot,
writer=to_dot,
extensions=[".dot", ".gv"],
default=True,
)
FormatRegistry.register(
DirectedMultiGraph, "edgelist", reader=from_edgelist, writer=to_edgelist, extensions=[".el"]
)