Skip to content

Commit 6f82fc3

Browse files
committed
code cleanup & small changes in stats calc
1 parent 060ff80 commit 6f82fc3

File tree

14 files changed

+411
-75
lines changed

14 files changed

+411
-75
lines changed

.github/workflows/docker.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
- name: extract version
3333
id: extract_version
3434
run: |
35-
VERSION=$(grep 'ARG VERSION=' Dockerfile | cut -d'=' -f2)
35+
VERSION=${GITHUB_REF#refs/tags/}
3636
echo "::set-output name=VERSION::$VERSION"
3737
3838
- name: build and push

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
# [0.2.2]
9+
10+
## Features
11+
12+
- add test data
13+
14+
## Fix
15+
16+
- Code cleanup
17+
- Fix in writing to stats.txt that cause overwriting in different subcalls
18+
19+
820
# [0.2.1]
921

1022
## Features

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
cmake_minimum_required(VERSION 3.22.1)
2-
project(RNAnue VERSION 0.2.1)
2+
project(RNAnue VERSION 0.2.2)
33
set(CMAKE_CXX_STANDARD 20)
44
set(CMAKE_CXX_STANDARD_REQUIRED True)
55
set(CMAKE_CXX_FLAGS -fopenmp)

README.md

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[![docker-release](https://github.com/Ibvt/RNAnue/actions/workflows/docker.yml/badge.svg)](https://github.com/Ibvt/RNAnue/actions/workflows/docker.yml)
22

3-
# RNAnue - 0.2.1
3+
# RNAnue - 0.2.2
44

55
## About
66
RNAnue is a comprehensive analysis to detect RNA-RNA interactions from Direct-Duplex-Detection (DDD) data.
@@ -126,9 +126,8 @@ columns are defined in the following:
126126

127127
### Interaction table
128128

129-
The `analysis` procedure generates `_interactions` files for each library in
130-
which each line represents an annotated split read that is mapped to a
131-
transcript interaction. The fields are defined as follows:
129+
The `analysis` procedure generates `_interactions` files for each library in which each line represents an annotated
130+
split read that is mapped to a transcript interaction. The fields are defined as follows:
132131

133132
| Field | Description |
134133
| ----- | ----------- |
@@ -157,11 +156,9 @@ transcript interaction. The fields are defined as follows:
157156
| mfe | Hybridisation energy of the interaction |
158157
| mfe_struc | Minimum free energy (MFE) structure of interaction in dot-bracket notation |
159158

160-
The main result of an RNAnue analysis are transcript interactions.
161-
They are stored in the file `allints.txt` in the same directory.
162-
Its entries are structured as described in the following where
163-
columns with prefix <sample> are given for each sample specified in
164-
the analysis (within the same file).
159+
The main result of an RNAnue analysis are transcript interactions. They are stored in the file `allints.txt` in the
160+
same directory. Its entries are structured as described in the following where columns with prefix <sample> are given
161+
for each sample specified in the analysis (within the same file).
165162

166163
| Field | Description |
167164
|-----------------------| ----------- |
@@ -182,10 +179,12 @@ in JSON graph format. Finally, –stats set to 1 creates a `stats.txt` file that
182179
each step of the analysis.
183180

184181
### Docker
185-
In additon, we provide a ready-to-use Docker container that has RNAnue preconfigured.
186-
https://hub.docker.com/repository/docker/cobirna/rnanue
182+
In additon, we provide a ready-to-use [Docker container](https://hub.docker.com/repository/docker/cobirna/rnanue) that
183+
has RNAnue preconfigured.
187184

188185
### Testing
189186

187+
We provide a test dataset in the [test](./test/data/) folder that can be used to test the installation.
188+
190189
# Troubleshooting
191190
contact [email protected] or create an issue

include/Analysis.hpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,27 @@
1313
#include <boost/program_options.hpp>
1414
#include <boost/filesystem.hpp>
1515
#include <boost/property_tree/ptree.hpp>
16+
#include <boost/property_tree/json_parser.hpp>
1617
#include <boost/accumulators/accumulators.hpp>
1718
#include <boost/accumulators/statistics.hpp>
1819
#include <boost/math/distributions/binomial.hpp>
19-
20+
#include <boost/math/distributions/chi_squared.hpp>
2021

2122
// SeqAn3
2223
#include <seqan3/io/sam_file/all.hpp>
2324
#include <seqan3/core/debug_stream.hpp>
2425

2526
// Class
2627
#include "IBPTree.hpp"
28+
#include "Stats.hpp"
2729

2830
// define tags
2931
using seqan3::operator""_tag;
3032

3133
namespace po = boost::program_options;
3234
namespace fs = boost::filesystem;
3335
namespace pt = boost::property_tree;
36+
namespace jp = boost::property_tree::json_parser;
3437
namespace ma = boost::math;
3538

3639
class Analysis {
@@ -44,10 +47,12 @@ class Analysis {
4447
void normalize(); // normalize the frequencies to 1
4548

4649
// write output files (of the analysis)
50+
void writeStats();
4751
void writeInteractionsHeader(std::ofstream& fout);
48-
void writeAllIntsHeader(std::ofstream& fout);
49-
void addToAllIntsHeader(std::ofstream& fout, std::string key);
52+
void writeAllIntsHeader(std::vector<int> condLastFlag, std::ofstream& fout);
5053
void writeAllInts();
54+
void writeAllIntsCounts();
55+
void writeAllIntsJGF();
5156

5257
// other operations
5358
void addToFreqMap(std::pair<std::string,std::string> key, double value);
@@ -59,19 +64,27 @@ class Analysis {
5964
double calcGCS(std::vector<double>& complementarities);
6065
double calcGHS(std::vector<double>& hybenergies);
6166
double calcStat(dtp::IntKey key, int x);
67+
double calcAdjusted(std::vector<double>& values);
6268

6369
private:
6470
po::variables_map params;
6571
IBPTree features;
6672
std::map<std::pair<std::string,std::string>,double> freq; // strand, name
6773
std::string condition; // buffers the current condition
68-
// maps for storing filters and suppreads
74+
std::vector<std::string> conditions; // buffers all conditions
75+
76+
// maps for storing filters and suppreads (and other information)
6977
std::map<dtp::IntKey, std::vector<double>> suppreads;
7078
std::map<dtp::IntKey, std::vector<std::vector<double>>> complementarities;
7179
std::map<dtp::IntKey, std::vector<std::vector<double>>> hybenergies;
7280

81+
// Stats
82+
std::shared_ptr<Stats> stats;
83+
7384
int repcount; // counter for current replicate
7485
int readcount; // total number of reads
86+
std::vector<int> repcountCond; // number of replicates per condition
87+
7588
};
7689

7790
#endif //RNANUE_ANALYSIS_HPP

include/DataTypes.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,13 @@ namespace dtp {
7575
int alignedCount;
7676
int splitsCount;
7777
int multSplitsCount;
78-
int nSurvivedCount;
78+
int interactionsCount;
79+
StatsFields() : readsCount(0), alignedCount(0), splitsCount(0), multSplitsCount(0), interactionsCount(0) {}
80+
StatsFields(int readsCount, int alignedCount, int splitsCount, int multSplitsCount) :
81+
readsCount(readsCount), alignedCount(alignedCount), splitsCount(splitsCount),
82+
multSplitsCount(multSplitsCount), interactionsCount(0) {} // constructor for analysis class
7983
};
80-
using StatsMap = std::map<std::string, StatsFields>;
84+
using StatsMap = std::map<std::string, std::vector<StatsFields>>;
8185
using SpliceJunctions = std::map<std::string, std::vector<std::pair<size_t,size_t>>>;
8286

8387
// Analysis

include/SplitReadCalling.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,14 @@ class SplitReadCalling {
111111
void addComplementarityToSamRecord(SAMrecord &rec1, SAMrecord &rec2, TracebackResult &res);
112112
void addHybEnergyToSamRecord(SAMrecord &rec1, SAMrecord &rec2, double &hyb);
113113
void writeSAMrecordToBAM(auto& bamfile, std::vector<std::pair<SAMrecord, SAMrecord>>& records);
114+
void writeStats();
114115

115116

116117
private:
117118
po::variables_map params;
118119
IBPTree features;
119-
//Stats stats;
120120
std::shared_ptr<Stats> stats;
121+
int replPerCond; // number of replicates per condition
121122
std::string condition; // stores the current condition
122123
std::deque<std::string> refIds; // stores the reference ids
123124
FilterScores filterScores;

include/Stats.hpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ namespace fs = boost::filesystem;
1818
class Stats {
1919
public:
2020
Stats();
21+
Stats(std::string statsFile);
2122

2223
// Move constructor / no needed because its unique
2324
Stats(Stats&& other) noexcept : stats(other.stats) {}
@@ -36,13 +37,16 @@ class Stats {
3637
Stats& operator=(const Stats&) = delete;
3738

3839
// getter & setter
39-
void setReadsCount(std::string condition, int increment);
40-
void setAlignedCount(std::string condition, int increment);
41-
void setSplitsCount(std::string condition, int increment);
42-
void setMultSplitsCount(std::string condition, int increment);
40+
void setReadsCount(std::string condition, int repl, int increment);
41+
void setAlignedCount(std::string condition, int repl, int increment);
42+
void setSplitsCount(std::string condition, int repl, int increment);
43+
void setMultSplitsCount(std::string condition, int repl, int increment);
44+
void setInteractionsCount(std::string condition, int repl, int increment);
45+
46+
void reserveStats(std::string condition, int repl); // creates new entry for replicate
4347

4448
// write stats back to file
45-
void writeStats(fs::path outdir);
49+
void writeStats(fs::path outdir, std::string subcall);
4650

4751
private:
4852
dtp::StatsMap stats;

include/Utility.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#include <iostream>
66
#include <iomanip>
77
#include <chrono>
8+
#include <random>
9+
810

911
// Boost
1012
#include <boost/filesystem.hpp>
@@ -42,10 +44,13 @@ namespace helper {
4244
bool withinRange(int a, int b, int range);
4345
std::string removeNonPrintable(const std::string str);
4446
std::string getTime(); // reports the current time
47+
48+
std::vector<int> lastOccFlag(std::vector<std::string>& vec);
4549
}
4650

4751
namespace stats {
4852
double median(std::vector<double>& values);
53+
double randNum(double min, double max);
4954
}
5055

5156
// sequence input/output

0 commit comments

Comments
 (0)