Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
346f794
fix(fpkm): update imports for zFPKM calculation improvements
JoshLoecker Feb 9, 2026
985c6f2
fix(fpkm): use Salmon quantification instead of STAR quantification
JoshLoecker Feb 9, 2026
d350063
chore: ruff formatting
JoshLoecker Feb 9, 2026
7482250
chore: fill with integers for faster processing
JoshLoecker Feb 9, 2026
155c822
chore: remove unnecessary async function usage
JoshLoecker Feb 9, 2026
f7b3a06
fix: remove non existant genes from conversion
JoshLoecker Feb 9, 2026
0e4a2c3
refactor: use more explicit (albeit longer) code to create gene_info …
JoshLoecker Feb 9, 2026
ab66599
chore: import required modules
JoshLoecker Feb 9, 2026
95654b3
refactor: optional argument for fragment data
JoshLoecker Feb 9, 2026
dec37b0
refactor: improve handling for single cell data
JoshLoecker Feb 9, 2026
fc1d45f
chore: generalize data type input
JoshLoecker Feb 9, 2026
e1505d1
chore: ruff formatting
JoshLoecker Feb 9, 2026
849ba2e
chore: simplify FPKM/RPKM calculations; properly compute per-gene FPK…
JoshLoecker Feb 9, 2026
3234413
refactor: move zfpkm calculation to external package
JoshLoecker Feb 9, 2026
f90c388
chore: use np.bool for boolean array
JoshLoecker Feb 9, 2026
8253a7d
chore: ruff formatting
JoshLoecker Feb 9, 2026
c52d2e8
feat: allow setting negative zFPKM results to 0
JoshLoecker Feb 9, 2026
e2e6350
feat: simplification to use external zfpkm package
JoshLoecker Feb 9, 2026
2ad9887
feat: allow providing the fragment size filepath (from rnaseq preproc…
JoshLoecker Feb 9, 2026
6af3990
chore(ruff): reduce max line length
JoshLoecker Feb 9, 2026
479fce2
chore(ruff): mark unsorted imports as fixable
JoshLoecker Feb 9, 2026
d83e974
chore(uv): lock pyproject file
JoshLoecker Feb 9, 2026
5afa6f3
fix: rename count to quant in testing files
JoshLoecker Feb 9, 2026
351e93c
feat: add single cell normalization using scanpy defaults
JoshLoecker Feb 9, 2026
2fd9249
fix: test new quant information
JoshLoecker Feb 9, 2026
4f07c14
fix: test new quant information
JoshLoecker Feb 9, 2026
710f3ea
chore: use quant files instead of strand files
JoshLoecker Feb 9, 2026
12b0425
chore: use quant files instead of strand files
JoshLoecker Feb 9, 2026
102edb0
chore: updated COMO_input files for naiveB to use updated FastqToGene…
JoshLoecker Feb 10, 2026
415f25f
feat: added Salmon quantification data for naive B
JoshLoecker Feb 10, 2026
07bb51d
chore: use `_read_file` function to read data
JoshLoecker Feb 10, 2026
d280fed
fix(tests): remove 1 from expected gene names to fix header
JoshLoecker Feb 10, 2026
8f7b215
fix(tests): use `endswith` instead of `is in`
JoshLoecker Feb 10, 2026
18e453d
fix(tests): Use missing file appropriately
JoshLoecker Feb 10, 2026
dc13818
chore(uv): Use dependency groups
JoshLoecker Feb 10, 2026
d463ec7
revert: use synchronous programming for more deterministic usage
JoshLoecker Feb 10, 2026
ae317d3
Merge branch 'refs/heads/fix-fpkm-calculation' into feat-scapy-normal…
JoshLoecker Feb 10, 2026
a360b7b
chore(type): fix pyrefly type errors
JoshLoecker Feb 10, 2026
d6f087b
chore(type): fix ruff & pyrefly issues
JoshLoecker Feb 10, 2026
e8f22b1
Merge branch 'develop' into typing-rnaseq-preprocess
JoshLoecker Feb 10, 2026
128841c
refactor: rename `_log_and_raise_error` to `log_and_raise_error`
JoshLoecker Feb 10, 2026
00bf915
refactor: rename `_read_file` to `read_file`
JoshLoecker Feb 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions main/como/cluster_rnaseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np

from como.data_types import LogLevel
from como.utils import _log_and_raise_error, stringlist_to_list
from como.utils import log_and_raise_error, stringlist_to_list


@dataclass
Expand All @@ -35,35 +35,35 @@ def __post_init__(self): # noqa: C901, ignore too complex
self.seed = np.random.randint(0, 100_000)

if (isdigit(self.min_active_count) and int(self.min_active_count) < 0) or self.min_active_count != "default":
_log_and_raise_error(
log_and_raise_error(
"min_active_count must be either 'default' or an integer > 0",
error=ValueError,
level=LogLevel.ERROR,
)

if (isdigit(self.quantile) and 0 > int(self.quantile) > 100) or self.quantile != "default":
_log_and_raise_error(
log_and_raise_error(
"quantile must be either 'default' or an integer between 0 and 100",
error=ValueError,
level=LogLevel.ERROR,
)

if (isdigit(self.replicate_ratio) and 0 > self.replicate_ratio > 1.0) or self.replicate_ratio != "default":
_log_and_raise_error(
log_and_raise_error(
"--rep-ratio must be either 'default' or a float between 0 and 1",
error=ValueError,
level=LogLevel.ERROR,
)

if (isdigit(self.batch_ratio) and 0 > self.batch_ratio > 1.0) or self.batch_ratio != "default":
_log_and_raise_error(
log_and_raise_error(
"--batch-ratio must be either 'default' or a float between 0 and 1",
error=ValueError,
level=LogLevel.ERROR,
)

if self.filtering_technique.lower() not in {"quantile", "tpm", "cpm", "zfpkm"}:
_log_and_raise_error(
log_and_raise_error(
"--technique must be either 'quantile', 'tpm', 'cpm', 'zfpkm'",
error=ValueError,
level=LogLevel.ERROR,
Expand All @@ -73,35 +73,35 @@ def __post_init__(self): # noqa: C901, ignore too complex
self.filtering_technique = "quantile"

if self.cluster_algorithm.lower() not in {"mca", "umap"}:
_log_and_raise_error(
log_and_raise_error(
"--clust_algo must be either 'mca', 'umap'",
error=ValueError,
level=LogLevel.ERROR,
)

if 0 > self.min_distance > 1.0:
_log_and_raise_error(
log_and_raise_error(
"--min_dist must be a float between 0 and 1",
error=ValueError,
level=LogLevel.ERROR,
)

if (isdigit(self.num_replicate_neighbors) and self.num_replicate_neighbors < 1) or self.num_replicate_neighbors != "default":
_log_and_raise_error(
log_and_raise_error(
"--n-neighbors-rep must be either 'default' or an integer > 1",
error=ValueError,
level=LogLevel.ERROR,
)

if (isdigit(self.num_batch_neighbors) and self.num_batch_neighbors < 1) or self.num_batch_neighbors != "default":
_log_and_raise_error(
log_and_raise_error(
"--n-neighbors-batch must be either 'default' or an integer > 1",
error=ValueError,
level=LogLevel.ERROR,
)

if (isdigit(self.num_context_neighbors) and self.num_context_neighbors < 1) or self.num_context_neighbors != "default":
_log_and_raise_error(
log_and_raise_error(
"--n-neighbors-context must be either 'default' or an integer > 1",
error=ValueError,
level=LogLevel.ERROR,
Expand Down
6 changes: 3 additions & 3 deletions main/como/combine_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
_OutputCombinedSourceFilepath,
_SourceWeights,
)
from como.utils import LogLevel, _log_and_raise_error, _num_columns
from como.utils import LogLevel, log_and_raise_error, _num_columns, get_missing_gene_data


def _combine_z_distribution_for_batch(
Expand Down Expand Up @@ -186,7 +186,7 @@ def _combine_z_distribution_for_context(
for res in zscore_results:
matrix = res.z_score_matrix.copy()
if len(matrix.columns) > 1:
_log_and_raise_error(
log_and_raise_error(
f"Expected a single column for combined z-score dataframe for data '{res.type.value.lower()}'. Got '{len(matrix.columns)}' columns",
error=ValueError,
level=LogLevel.ERROR,
Expand Down Expand Up @@ -302,7 +302,7 @@ async def _begin_combining_distributions(
else ""
)
if not index_name:
_log_and_raise_error(
log_and_raise_error(
f"Unable to find common gene identifier across batches for source '{source.value}' in context '{context_name}'",
error=ValueError,
level=LogLevel.ERROR,
Expand Down
Loading