from pathlib import Path
from typing import Union
import pandas as pd
from pandas_genomics.scalars import Region
[docs]def from_bed(filename: Union[str, Path]):
"""
Yields genomic regions from a bed file as Region scalars
Parameters
----------
filename: str or Path
bed file
Returns
-------
List[Region]
"""
with open(filename, "r") as input:
for idx, line in enumerate(input):
if (
line.startswith("browser")
or line.startswith("track")
or line.startswith("#")
):
continue
line = line.split("\t")
if len(line) < 3:
raise ValueError(
f"Expected at least 3 tab-delimited fields, found {len(line)} in '{line}'."
)
# Note that positions need to be incremented by 1 to go from 0-based to 1-based.
yield Region(line[0], int(line[1]) + 1, int(line[2]) + 1, line[3])