Skip to content

Filetypes

The following filetypes are available to use:

  • AlignmentInput
  • CramOrBamPath
  • BamPath
  • CramPath
  • GvcfPath
  • FastqPair
  • FastqPairs

You can import them from the cpg_flow package:

from cpg_flow import AlignmentInput, CramOrBamPath, BamPath, CramPath, GvcfPath, FastqPair, FastqPairs

cpg_flow.filetypes.AlignmentInput

Bases: ABC

Data that works as input for alignment or realignment.

exists abstractmethod

exists()

Check if all files exist.

Source code in src/cpg_flow/filetypes.py
21
22
23
24
25
@abstractmethod
def exists(self) -> bool:
    """
    Check if all files exist.
    """

cpg_flow.filetypes.CramOrBamPath

CramOrBamPath(
    path, index_path=None, reference_assembly=None
)

Bases: AlignmentInput, ABC

Represents a path to a CRAM or a BAM file, optionally with corresponding index.

Source code in src/cpg_flow/filetypes.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def __init__(
    self,
    path: str | Path,
    index_path: str | Path | None = None,
    reference_assembly: str | Path | None = None,
):
    self.path = to_path(path)
    self.index_path: Path | None = None
    self.full_index_suffix: str | None = None
    if index_path:
        self.index_path = to_path(index_path)
        assert self.index_path.suffix == f".{self.index_ext}"
        self.full_index_suffix = str(self.index_path).replace(
            str(self.path.with_suffix("")), "",
        )
    self.reference_assembly = None
    if reference_assembly:
        self.reference_assembly = to_path(reference_assembly)

ext abstractmethod property

ext

The canonical extension for the file type, without a '.' at the start.

index_ext abstractmethod property

index_ext

The canonical index file extension, without a '.' at the start.

exists

exists()

CRAM file exists.

Source code in src/cpg_flow/filetypes.py
76
77
78
79
80
def exists(self) -> bool:
    """
    CRAM file exists.
    """
    return exists(self.path)

resource_group

resource_group(b)

Create a Hail Batch resource group

Source code in src/cpg_flow/filetypes.py
82
83
84
85
86
87
88
89
90
91
92
def resource_group(self, b: Batch) -> ResourceGroup:
    """
    Create a Hail Batch resource group
    """
    d = {
        self.ext: str(self.path),
    }
    if self.full_index_suffix:
        d[self.full_index_suffix] = str(self.index_path)

    return b.read_input_group(**d)

cpg_flow.filetypes.BamPath

BamPath(path, index_path=None)

Bases: CramOrBamPath

Represents a path to a BAM file, optionally with corresponding index.

Source code in src/cpg_flow/filetypes.py
103
104
105
106
107
108
def __init__(
    self,
    path: str | Path,
    index_path: str | Path | None = None,
):
    super().__init__(path, index_path)

exists

exists()

CRAM file exists.

Source code in src/cpg_flow/filetypes.py
76
77
78
79
80
def exists(self) -> bool:
    """
    CRAM file exists.
    """
    return exists(self.path)

resource_group

resource_group(b)

Create a Hail Batch resource group

Source code in src/cpg_flow/filetypes.py
82
83
84
85
86
87
88
89
90
91
92
def resource_group(self, b: Batch) -> ResourceGroup:
    """
    Create a Hail Batch resource group
    """
    d = {
        self.ext: str(self.path),
    }
    if self.full_index_suffix:
        d[self.full_index_suffix] = str(self.index_path)

    return b.read_input_group(**d)

cpg_flow.filetypes.CramPath

CramPath(path, index_path=None, reference_assembly=None)

Bases: CramOrBamPath

Represents a path to a CRAM file, optionally with corresponding index.

Source code in src/cpg_flow/filetypes.py
127
128
129
130
131
132
133
134
def __init__(
    self,
    path: str | Path,
    index_path: str | Path | None = None,
    reference_assembly: str | Path | None = None,
):
    super().__init__(path, index_path, reference_assembly)
    self.somalier_path = to_path(f"{self.path}.somalier")

somalier_path instance-attribute

somalier_path = to_path(f'{path}.somalier')

exists

exists()

CRAM file exists.

Source code in src/cpg_flow/filetypes.py
76
77
78
79
80
def exists(self) -> bool:
    """
    CRAM file exists.
    """
    return exists(self.path)

resource_group

resource_group(b)

Create a Hail Batch resource group

Source code in src/cpg_flow/filetypes.py
82
83
84
85
86
87
88
89
90
91
92
def resource_group(self, b: Batch) -> ResourceGroup:
    """
    Create a Hail Batch resource group
    """
    d = {
        self.ext: str(self.path),
    }
    if self.full_index_suffix:
        d[self.full_index_suffix] = str(self.index_path)

    return b.read_input_group(**d)

cpg_flow.filetypes.GvcfPath

GvcfPath(path)

Represents GVCF data on a bucket within the workflow. Includes a path to a GVCF file along with a corresponding TBI index, and a corresponding fingerprint path.

Source code in src/cpg_flow/filetypes.py
152
153
154
def __init__(self, path: Path | str):
    self.path = to_path(path)
    self.somalier_path = to_path(f"{self.path}.somalier")

somalier_path instance-attribute

somalier_path = to_path(f'{path}.somalier')

tbi_path property

tbi_path

Path to the corresponding index

exists

exists()

GVCF file exists.

Source code in src/cpg_flow/filetypes.py
162
163
164
165
166
def exists(self) -> bool:
    """
    GVCF file exists.
    """
    return self.path.exists()

resource_group

resource_group(b)

Create a Hail Batch resource group

Source code in src/cpg_flow/filetypes.py
175
176
177
178
179
180
181
182
183
184
def resource_group(self, b: Batch) -> ResourceGroup:
    """
    Create a Hail Batch resource group
    """
    return b.read_input_group(
        **{
            "g.vcf.gz": str(self.path),
            "g.vcf.gz.tbi": str(self.tbi_path),
        },
    )

cpg_flow.filetypes.FastqPair dataclass

FastqPair(r1, r2)

Bases: AlignmentInput

Pair of FASTQ files

as_resources

as_resources(b)

Makes a pair of ResourceFile objects for r1 and r2.

Source code in src/cpg_flow/filetypes.py
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def as_resources(self, b) -> "FastqPair":
    """
    Makes a pair of ResourceFile objects for r1 and r2.
    """
    return FastqPair(
        *[
            (
                self[i]
                if isinstance(self[i], ResourceFile)
                else b.read_input(str(self[i]))
            )
            for i in [0, 1]
        ],
    )

exists

exists()

Check if each FASTQ file in the pair exists.

Source code in src/cpg_flow/filetypes.py
218
219
220
221
222
def exists(self) -> bool:
    """
    Check if each FASTQ file in the pair exists.
    """
    return exists(self.r1) and exists(self.r2)

cpg_flow.filetypes.FastqPairs

Bases: list[FastqPair], AlignmentInput

Multiple FASTQ file pairs belonging to the same sequencing_group (e.g. multiple lanes or top-ups).

exists

exists()

Check if each FASTQ file in each pair exists.

Source code in src/cpg_flow/filetypes.py
243
244
245
246
247
def exists(self) -> bool:
    """
    Check if each FASTQ file in each pair exists.
    """
    return all(pair.exists() for pair in self)