Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
346f794
fix(fpkm): update imports for zFPKM calculation improvements
JoshLoecker Feb 9, 2026
985c6f2
fix(fpkm): use Salmon quantification instead of STAR quantification
JoshLoecker Feb 9, 2026
d350063
chore: ruff formatting
JoshLoecker Feb 9, 2026
7482250
chore: fill with integers for faster processing
JoshLoecker Feb 9, 2026
155c822
chore: remove unnecessary async function usage
JoshLoecker Feb 9, 2026
f7b3a06
fix: remove non existant genes from conversion
JoshLoecker Feb 9, 2026
0e4a2c3
refactor: use more explicit (albeit longer) code to create gene_info …
JoshLoecker Feb 9, 2026
ab66599
chore: import required modules
JoshLoecker Feb 9, 2026
95654b3
refactor: optional argument for fragment data
JoshLoecker Feb 9, 2026
dec37b0
refactor: improve handling for single cell data
JoshLoecker Feb 9, 2026
fc1d45f
chore: generalize data type input
JoshLoecker Feb 9, 2026
e1505d1
chore: ruff formatting
JoshLoecker Feb 9, 2026
849ba2e
chore: simplify FPKM/RPKM calculations; properly compute per-gene FPK…
JoshLoecker Feb 9, 2026
3234413
refactor: move zfpkm calculation to external package
JoshLoecker Feb 9, 2026
f90c388
chore: use np.bool for boolean array
JoshLoecker Feb 9, 2026
8253a7d
chore: ruff formatting
JoshLoecker Feb 9, 2026
c52d2e8
feat: allow setting negative zFPKM results to 0
JoshLoecker Feb 9, 2026
e2e6350
feat: simplification to use external zfpkm package
JoshLoecker Feb 9, 2026
2ad9887
feat: allow providing the fragment size filepath (from rnaseq preproc…
JoshLoecker Feb 9, 2026
6af3990
chore(ruff): reduce max line length
JoshLoecker Feb 9, 2026
479fce2
chore(ruff): mark unsorted imports as fixable
JoshLoecker Feb 9, 2026
d83e974
chore(uv): lock pyproject file
JoshLoecker Feb 9, 2026
5afa6f3
fix: rename count to quant in testing files
JoshLoecker Feb 9, 2026
4f07c14
fix: test new quant information
JoshLoecker Feb 9, 2026
710f3ea
chore: use quant files instead of strand files
JoshLoecker Feb 9, 2026
102edb0
chore: updated COMO_input files for naiveB to use updated FastqToGene…
JoshLoecker Feb 10, 2026
415f25f
feat: added Salmon quantification data for naive B
JoshLoecker Feb 10, 2026
07bb51d
chore: use `_read_file` function to read data
JoshLoecker Feb 10, 2026
d280fed
fix(tests): remove 1 from expected gene names to fix header
JoshLoecker Feb 10, 2026
8f7b215
fix(tests): use `endswith` instead of `is in`
JoshLoecker Feb 10, 2026
18e453d
fix(tests): Use missing file appropriately
JoshLoecker Feb 10, 2026
dc13818
chore(uv): Use dependency groups
JoshLoecker Feb 10, 2026
d463ec7
revert: use synchronous programming for more deterministic usage
JoshLoecker Feb 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
617 changes: 269 additions & 348 deletions main/como/rnaseq_gen.py

Large diffs are not rendered by default.

565 changes: 313 additions & 252 deletions main/como/rnaseq_preprocess.py

Large diffs are not rendered by default.

31 changes: 17 additions & 14 deletions main/como/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,35 +188,39 @@ async def get_missing_gene_data(values: list[str] | pd.DataFrame, taxon_id: int


@overload
async def _read_file(path: None, h5ad_as_df: Literal[True] | Literal[False], **kwargs) -> None: ...
def _read_file(path: None, h5ad_as_df: bool = True, **kwargs: Any) -> None: ...


@overload
async def _read_file(path: pd.DataFrame, h5ad_as_df: Literal[True] | Literal[False], **kwargs) -> pd.DataFrame: ...
def _read_file(path: pd.DataFrame, h5ad_as_df: bool = True, **kwargs: Any) -> pd.DataFrame: ...


@overload
async def _read_file(path: sc.AnnData, h5ad_as_df: Literal[False] = False, **kwargs) -> sc.AnnData: ...
def _read_file(path: io.StringIO, h5ad_as_df: bool = True, **kwargs: Any) -> pd.DataFrame: ...


@overload
async def _read_file(path: sc.AnnData, h5ad_as_df: Literal[True] = True, **kwargs) -> pd.DataFrame: ...
def _read_file(path: sc.AnnData, h5ad_as_df: Literal[False], **kwargs: Any) -> sc.AnnData: ...


@overload
async def _read_file(path: Path, h5ad_as_df: Literal[False] = False, **kwargs) -> pd.DataFrame | sc.AnnData: ...
def _read_file(path: sc.AnnData, h5ad_as_df: Literal[True] = True, **kwargs: Any) -> pd.DataFrame: ...


@overload
async def _read_file(path: Path, h5ad_as_df: Literal[True] = True, **kwargs) -> pd.DataFrame: ...
def _read_file(path: Path, h5ad_as_df: Literal[False], **kwargs: Any) -> pd.DataFrame | sc.AnnData: ...


async def _read_file(
@overload
def _read_file(path: Path, h5ad_as_df: Literal[True] = True, **kwargs: Any) -> pd.DataFrame: ...


def _read_file(
path: Path | io.StringIO | pd.DataFrame | sc.AnnData | None,
h5ad_as_df: bool = True,
**kwargs,
**kwargs: Any,
) -> pd.DataFrame | sc.AnnData | None:
"""Asynchronously read a filepath and return a pandas DataFrame.
"""Read a filepath and return pandas.DataFrame or scanpy.AnnData.

If the provided path is None, None will also be returned.
None may be provided to this function so that `asyncio.gather` can safely be used on all sources
Expand Down Expand Up @@ -244,18 +248,17 @@ async def _read_file(
_log_and_raise_error(f"File {path} does not exist", error=FileNotFoundError, level=LogLevel.CRITICAL)

match path.suffix:
case ".csv" | ".tsv" | ".txt" | ".tab":
case ".csv" | ".tsv" | ".txt" | ".tab" | ".sf":
kwargs.setdefault("sep", "," if path.suffix == ".csv" else "\t") # set sep if not defined
async with aiofiles.open(path) as i_stream:
content = await i_stream.read()
return pd.read_csv(io.StringIO(content), **kwargs)
return pd.read_csv(path, **kwargs)
case ".xlsx" | ".xls":
return pd.read_excel(path, **kwargs)
case ".h5ad":
adata: sc.AnnData = sc.read_h5ad(path, **kwargs)
if h5ad_as_df:
df = adata.to_df().T
df.index.name = "gene_symbol"
if not df.index.name:
df.index.name = "gene_symbol"
df.reset_index(inplace=True)
return df
return adata
Expand Down
336,507 changes: 78,362 additions & 258,145 deletions main/data/COMO_input/naiveB/fragmentSizes/S1/naiveB_S1R1_fragment_size.txt

Large diffs are not rendered by default.

Loading