# Philosopher pipeline configuration file
# Version 3.6.0
#
# The pipeline mode automates the processing done by Philosopher and other tools. First, check
# the steps you want to execute in the commands section and change them to
# 'yes'. For each selected command, go to its section and adjust the parameters
# accordingly to your analysis.
#
# If you want to include MSFragger and TMT-Integrator into your analysis, you will
# have to download them separately, and then add their location in their 
# configuration section.
#
# Usage:
# philosopher pipeline --config <this_configuration_file> [list_of_data_set_folders]

analytics: true							# reports when a workspace is created for usage statistics
slackToken: 							# specify the Slack API token (how to generate a token: https://api.slack.com/legacy/custom-integrations/legacy-tokens)
slackChannel: 							# specify the channel name, or
slackUserID: 							# specify a user ID for a direct message

Steps:
  Database Search: yes							# peptide to spectrum matching with Comet or MSFragger
  Peptide Validation: yes							# peptide assignment validation with PeptideProphet
  PTM Localization: no							# PTM site localization with PTMProphet
  Protein Inference: no							# protein identification validation with ProteinProphet
  Label-Free Quantification: yes							# precursor label-free quantification inspired by moFF
  Isobaric Quantification: yes							# isobaric labeling-based relative quantification for TMT and iTRAQ
  Bio Cluster Quantification: no							# protein report based on Uniprot protein clusters
  FDR Filtering: yes							# statistical filtering, validation and false discovery r ates assessment
  Individual Reports: yes							# multi-level reporting for both narrow-searches and open-searches
  Integrated Reports: yes							# combined analysis of LC-MS/MS results inspired by Abacus
  Integrated Isobaric Quantification: yes							# integrates channel abundances from multiple isobaric-tagged samples with TMT-Integrator                        

Database Search: 							# MSFragger 3.2 & Comet
  protein_database: [database directory]							# path to the target-decoy protein database
  decoy_tag: rev_							# prefix tag used added to decoy sequences
  contaminant_tag: false							# prefix tag used added to decoy sequences
  search_engine: msfragger							# search engine options include "comet" and "msfragger"
  comet: 							# Comet v2019011
    noindex: true							# skip mzML file indexing
    param: 							# comet parameter file (default "comet.params.txt")
    extension: mzML							# format of the spectra file
  msfragger: 							# MSFragger v3.0
    path: bin/MSFragger-3.0.jar							# path to MSFragger jar
    memory: 32							# how much memory in GB to use
    param: 							# MSFragger parameter file
    extension: mzML							# spectra format
    data_type: 0							# 0 for DDA, 1 for DIA, 2 for DIA-narrow-window
    num_threads: 32							# 0=poll CPU to set num threads; else specify num threads directly (max 64)
    precursor_mass_lower: -20							# lower bound of the precursor mass window
    precursor_mass_upper: 20							# upper bound of the precursor mass window
    precursor_mass_units: 1							# 0=Daltons, 1=ppm
    precursor_true_tolerance: 20							# true precursor mass tolerance (window is +/- this value)
    precursor_true_units: 1							# 0=Daltons, 1=ppm
    fragment_mass_tolerance: 20							# fragment mass tolerance (window is +/- this value)
    fragment_mass_units: 1							# fragment mass tolerance units (0 for Da, 1 for ppm)
    calibrate_mass: 2							# 0=Off, 1=On, 2=On and find optimal parameters
    deneutralloss: 0							# Perform deneutrallossing or not (0=no, 1=yes)
    deisotope: 1							# activates deisotoping.
    isotope_error: -1/0/1/2/3							# 0=off, 0/1/2 (standard C13 error)
    mass_offsets: 0							# allow for additional precursor mass window shifts. Multiplexed with isotope_error. mass_offsets = 0/79.966 can be used as a restricted ‘open’ search that looks for unmodified and phosphorylated peptides (on any residue)
    precursor_mass_mode: selected							# selected or isolated
    localize_delta_mass: 0							# this allows shifted fragment ions - fragment ions with mass increased by the calculated mass difference, to be included in scoring
    delta_mass_exclude_ranges: (-1.5,3.5)							# exclude mass range for shifted ions searching
    fragment_ion_series: b,y							# ion series used in search
    ion_series_definitions: 							# User defined ion series. (Example: b* N -17.026548;b0 N -18.010565)
    search_enzyme_name: Trypsin							# name of enzyme to be written to the pepXML file
    search_enzyme_cutafter: KR							# residues after which the enzyme cuts
    search_enzyme_butnotafter: P							# residues that the enzyme will not cut before
    num_enzyme_termini: 2							# 2 for enzymatic, 1 for semi-enzymatic, 0 for nonspecific digestion
    allowed_missed_cleavage: 2							# maximum value is 5
    clip_nTerm_M: 1							# specifies the trimming of a protein N-terminal methionine as a variable modification (0 or 1)
    variable_mod_01: 15.99490 M 3							# variable modification
    variable_mod_02: 42.01060 [^ 1							# variable modification
    variable_mod_03: 229.162932 n^ 1							# variable modification
    variable_mod_04: 229.162932 S 1							# variable modification
    variable_mod_05: 							# variable modification
    variable_mod_06: 							# variable modification
    variable_mod_07: 							# variable modification
    allow_multiple_variable_mods_on_residue: 0							# static mods are not considered
    max_variable_mods_per_peptide: 3							# maximum of 5
    max_variable_mods_combinations: 5000							# maximum of 65534, limits number of modified peptides generated from sequence
    output_file_extension: pepXML							# file extension of output files
    output_format: pepXML							# file format of output files (pepXML or tsv)
    write_calibrated_mgf: 0							# Write calibrated MS2 scan to a MGF file (0 for No, 1 for Yes)
    output_report_topN: 1							# reports top N PSMs per input spectrum
    output_max_expect: 50							# suppresses reporting of PSM if top hit has expectation greater than this threshold
    report_alternative_proteins: 0							# 0=no, 1=yes
    precursor_charge: 1 6							# assume range of potential precursor charge states. Only relevant when override_charge is set to 1
    override_charge: 0							# 0=no, 1=yes to override existing precursor charge states with precursor_charge parameter
    digest_min_length: 7							# minimum length of peptides to be generated during in-silico digestion
    digest_max_length: 50							# maximum length of peptides to be generated during in-silico digestion
    digest_mass_range: 500.0 5000.0							# mass range of peptides to be generated during in-silico digestion in Daltons
    max_fragment_charge: 2							# maximum charge state for theoretical fragments to match (1-4)
    track_zero_topN: 0							# in addition to topN results, keep track of top results in zero bin
    zero_bin_accept_expect: 0							# boost top zero bin entry to top if it has expect under 0.01 - set to 0 to disable
    zero_bin_mult_expect: 1							# disabled if above passes - multiply expect of zero bin for ordering purposes (does not affect reported expect)
    add_topN_complementary: 0							# inserts complementary ions corresponding to the top N most intense fragments in each experimental spectra
    minimum_peaks: 15							# required minimum number of peaks in spectrum to search (default 10)
    use_topN_peaks: 300							# pre-process experimental spectrum to only use top N peaks
    min_fragments_modelling: 2							# minimum number of matched peaks in PSM for inclusion in statistical modeling
    min_matched_fragments: 4							# minimum number of matched peaks for PSM to be reported
    minimum_ratio: 0.01							# filters out all peaks in experimental spectrum less intense than this multiple of the base peak intensity
    clear_mz_range: 125.5 131.5							# for iTRAQ/TMT type data; will clear out all peaks in the specified m/z range
    remove_precursor_peak: 0							# remove precursor peaks from tandem mass spectra. 0=not remove; 1=remove the peak with precursor charge; 2=remove the peaks with all charge states.
    remove_precursor_range: -1.5,1.5							# m/z range in removing precursor peaks. Unit: Da.
    intensity_transform: 0							# transform peaks intensities with sqrt root. 0=not transform; 1=transform using sqrt root.
    mass_diff_to_variable_mod: 0							# Put mass diff as a variable modification. 0 for no; 1 for yes and change the original mass diff and the calculated mass accordingly; 2 for yes but do not change the original mass diff.
    labile_search_mode: off							# type of search (nglycan, labile, or off). Off means non-labile/typical search.
    restrict_deltamass_to: all							# Specify amino acids on which delta masses (mass offsets or search modifications) can occur. Allowed values are single letter codes (e.g. ACD), must
    diagnostic_intensity_filter: 0							# [nglycan/labile search_mode only]. Minimum relative intensity for SUM of all detected oxonium ions to achieve for spectrum to contain diagnostic fragment evidence. Calculated relative to spectrum base peak. 0 <= value.
    Y_type_masses: 0							# [nglycan/labile search_mode only]. Specify fragments of labile mods that are commonly retained on intact peptides (e.g. Y ions for glycans). Only used if 'Y' is included in fragment_ion_series.
    diagnostic_fragments: 0 							# [nglycan/labile search_mode only]. Specify diagnostic fragments of labile mods that appear in the low m/z region. Only used if diagnostic_intensity_filter > 0.
    add_Cterm_peptide: 0.000000							# c-term peptide fixed modifications
    add_Cterm_protein: 0.000000							# c-term protein fixed modifications
    add_Nterm_peptide: 0.000000							# n-term peptide fixed modifications
    add_Nterm_protein: 0.000000							# n-term protein fixed modifications
    add_A_alanine: 0.000000							# alanine fixed modifications 
    add_C_cysteine: 57.021464							# cysteine fixed modifications 
    add_D_aspartic_acid: 0.000000							# aspartic acid fixed modifications
    add_E_glutamic_acid: 0.000000							# glutamic acid fixed modifications
    add_F_phenylalanine: 0.000000							# phenylalanine fixed modifications
    add_G_glycine: 0.000000							# glycine fixed modifications
    add_H_histidine: 0.000000							# histidine fixed modifications
    add_I_isoleucine: 0.000000							# isoleucine fixed modifications
    add_K_lysine: 229.162932							# lysine fixed modifications
    add_L_leucine: 0.000000							# leucine fixed modifications
    add_M_methionine: 0.000000							# methionine fixed modifications
    add_N_asparagine: 0.000000							# asparagine fixed modifications
    add_P_proline: 0.000000							# proline fixed modifications
    add_Q_glutamine: 0.000000							# glutamine fixed modifications
    add_R_arginine: 0.000000							# arginine fixed modifications
    add_S_serine: 0.000000							# serine fixed modifications
    add_T_threonine: 0.000000							# threonine fixed modifications
    add_V_valine: 0.000000							# valine fixed modifications
    add_W_tryptophan: 0.000000							# tryptophan fixed modifications
    add_Y_tyrosine: 0.000000							# tyrosine fixed modifications
  
Peptide Validation: 							# PeptideProphet v5.2
  concurrent: false							# Concurrent execution of multiple instaces
  extension: pepXML							# pepXML file extension
  clevel: 0							# set Conservative Level in neg_stdev from the neg_mean, low numbers are less conservative, high numbers are more conservative
  accmass: true							# use Accurate Mass model binning
  decoyprobs: true							# compute possible non-zero probabilities for Decoy entries on the last iteration
  enzyme: trypsin							# enzyme used in sample (optional)
  exclude: false							# exclude deltaCn*, Mascot*, and Comet* results from results (default Penalize * results)
  expectscore: true							# use expectation value as the only contributor to the f-value for modeling
  forcedistr: false							# bypass quality control checks, report model despite bad modeling
  glyc: false							# enable peptide Glyco motif model
  icat: false							# apply ICAT model (default Autodetect ICAT)
  instrwarn: false							# warn and continue if combined data was generated by different instrument models
  leave: false							# leave alone deltaCn*, Mascot*, and Comet* results from results (default Penalize * results)
  maldi: false							# enable MALDI mode
  masswidth: 5							# model mass width (default 5)
  minpeplen: 7							# minimum peptide length not rejected (default 7)
  minpintt: 2							# minimum number of NTT in a peptide used for positive pI model (default 2)
  minpiprob: 0.9							# minimum probability after first pass of a peptide used for positive pI model (default 0.9)
  minprob: 0.05							# report results with minimum probability (default 0.05)
  minrtntt: 2							# minimum number of NTT in a peptide used for positive RT model (default 2)
  minrtprob: 0.9							# minimum probability after first pass of a peptide used for positive RT model (default 0.9)
  neggamma: false							# use Gamma distribution to model the negative hits
  noicat: false							# do no apply ICAT model (default Autodetect ICAT)
  nomass: false							# disable mass model
  nonmc: false							# disable NMC missed cleavage model
  nonparam: true							# use semi-parametric modeling, must be used in conjunction with --decoy option
  nontt: false							# disable NTT enzymatic termini model
  optimizefval: false							# (SpectraST only) optimize f-value function f(dot,delta) using PCA
  phospho: false							# enable peptide Phospho motif model
  pi: false							# enable peptide pI model
  ppm: true							# use PPM mass error instead of Dalton for mass modeling
  zero: false							# report results with minimum probability 0

PTM Localization: 							# PTMProphet v6.0
  autodirect: false							# use direct evidence when the lability is high, use in combination with LABILITY
  cions: 							# use specified C-term ions, separate multiple ions by commas (default: y for CID, z for ETD)
  direct: false							# use only direct evidence for evaluating PTM site probabilities
  em: 2							# set EM models to 0 (no EM), 1 (Intensity EM Model Applied) or 2 (Intensity and Matched Peaks EM Models Applied)
  static: false							# use static fragppmtol for all PSMs instead of dynamically estimates offsets and tolerances
  fragppmtol: 15							# when computing PSM-specific mass_offset and mass_tolerance, use specified default +/- MS2 mz tolerance on fragment ions
  ifrags: false							# use internal fragments for localization
  keepold: false							# retain old PTMProphet results in the pepXML file
  lability: false							# compute Lability of PTMs
  massdiffmode: false							# use the Mass Difference and localize
  excludemassdiffmin: 0							# Minimum mass difference excluded for MASSDIFFFMODE analysis (default=0)
  excludemassdiffmax: 0							# Maximun mass difference excluded for MASSDIFFFMODE analysis (default=0)
  massoffset: 0							# adjust the massdiff by offset (0 = use default)
  maxfragz: 0							# limit maximum fragment charge (default: 0=precursor charge, negative values subtract from precursor charge)
  maxthreads: 4							# use specified number of threads for processing
  mino: 0							# use specified number of pseudo-counts when computing Oscore (0 = use default)
  minprob: 0							# use specified minimum probability to evaluate peptides
  mods: 							# specify modifications
  nions: 							# use specified N-term ions, separate multiple ions by commas (default: a,b for CID, c for ETD)
  nominofactor: false							# disable MINO factor correction when MINO= is set greater than 0 (default: apply MINO factor correction)
  ppmtol: 1							# use specified +/- MS1 ppm tolerance on peptides which may have a slight offset depending on search parameters
  verbose: false							# produce Warnings to help troubleshoot potential PTM shuffling or mass difference issues

Protein Inference: 							# ProteinProphet v5.2
  accuracy: false							# equivalent to --minprob 0
  allpeps: false							# consider all possible peptides in the database in the confidence model
  confem: false							# use the EM to compute probability given the confidence
  delude: false							# do NOT use peptide degeneracy information when assessing proteins
  excludezeros: false							# exclude zero prob entries
  fpkm: false							# model protein FPKM values
  glyc: false							# highlight peptide N-glycosylation motif
  icat: false							# highlight peptide cysteines
  instances: false							# use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment
  iprophet: false							# input is from iProphet
  logprobs: false							# use the log of the probabilities in the Confidence calculations
  maxppmdiff: 20							# maximum peptide mass difference in PPM (default 20)
  minprob: 0.05							# peptideProphet probabilty threshold (default 0.05)
  mufactor: 1							# fudge factor to scale MU calculation (default 1)
  nogroupwts: false							# check peptide's Protein weight against the threshold (default: check peptide's Protein Group weight against threshold)
  nonsp: false							# do not use NSP model
  nooccam: false							# non-conservative maximum protein list
  noprotlen: false							# do not report protein length
  normprotlen: false							# normalize NSP using Protein Length
  protmw: false							# get protein mol weights
  softoccam: false							# peptide weights are apportioned equally among proteins within each Protein Group (less conservative protein count estimate)
  unmapped: false							# report results for UNMAPPED proteins

Label-Free Quantification: 							# Freequant
  peakTimeWindow: 0.4							# specify the time windows for the peak (minute) (default 0.4)
  retentionTimeWindow: 3							# specify the retention time window for xic (minute) (default 3)
  tolerance: 10							# m/z tolerance in ppm (default 10)
  raw: false							# read raw files instead of converted mzML, or mzXML
  faims: false							# use FAIMS information for the quantification

Isobaric Quantification: 							# Labelquant
  bestPSM: true							# select the best PSMs for protein quantification
  level: 2							# ms level for the quantification
  minProb: 0.7							# only use PSMs with a minimum probability score
  plex: 11							# number of channels
  purity: 0.5							# ion purity threshold (default 0.5)
  removeLow: 0.05							# ignore the lower 3% PSMs based on their summed abundances
  tolerance: 20							# m/z tolerance in ppm (default 20)
  uniqueOnly: false							# report quantification based on only unique peptides
  brand: tmt							# isobaric labeling brand (tmt, itraq)
  raw: false							# read raw files instead of converted mzML, or mzXML

Bio Cluster Quantification: 							# BioQuant
  organismUniProtID: 							# UniProt proteome ID
  level: 0.9							# cluster identity level (default 0.9)

FDR Filtering: 							# Filter
  psmFDR: 0.01							# psm FDR level (default 0.01)
  peptideFDR: 0.01							# peptide FDR level (default 0.01)
  ionFDR: 0.01							# peptide ion FDR level (default 0.01)
  proteinFDR: 0.01							# protein FDR level (default 0.01)
  peptideProbability: 0.7							# top peptide probability threshold for the FDR filtering (default 0.7)
  proteinProbability: 0.5							# protein probability threshold for the FDR filtering (not used with the razor algorithm) (default 0.5)
  peptideWeight: 1							# threshold for defining peptide uniqueness (default 1)
  razor: true							# use razor peptides for protein FDR scoring
  picked: true							# apply the picked FDR algorithm before the protein scoring
  mapMods: true							# map modifications acquired by an open search
  models: true							# print model distribution
  sequential: true							# alternative algorithm that estimates FDR using both filtered PSM and Protein lists

Individual Reports: 							# Report
  msstats: false							# create an output compatible to MSstats
  withDecoys: false							# add decoy observations to reports
  mzID: false							# create a mzID output
            
Integrated Reports: 							# Abacus
  protein: true							# global level protein report
  peptide: true							# global level peptide report
  proteinProbability: 0.9							# minimum protein probability (default 0.9)
  peptideProbability: 0.5							# minimum peptide probability (default 0.5)
  uniqueOnly: false							# report TMT quantification based on only unique peptides
  reprint: false							# create abacus reports using the Reprint format

Integrated Isobaric Quantification: 							# TMT-Integrator v1.1.10
  path: [TMTIntegrator directory]								# path to TMT-Integrator jar
  memory: 32							# memory allocation, in Gb
  output: [TMTIntegrator output file]						# the location of output files
  channel_num: 11							# number of channels in the multiplex (e.g. 10, 11)
  ref_tag: Pool							# unique tag for identifying the reference channel (Bridge sample added to each multiplex)
  groupby: -1							# level of data summarization(0: PSM aggregation to the gene level; 1: protein; 2: peptide sequence; 3: PTM site; -1: generate reports at all levels)
  psm_norm: false							# perform additional retention time-based normalization at the PSM level
  outlier_removal: true							# perform outlier removal
  prot_norm: -1							# normalization (0: None; 1: MD (median centering); 2: GN (median centering + variance scaling); -1: generate reports with all normalization options)
  min_pep_prob: 0.9							# minimum PSM probability threshold (in addition to FDR-based filtering by Philosopher)
  min_purity: 0.5							# ion purity score threshold
  min_percent: 0.05							# remove low intensity PSMs (e.g. value of 0.05 indicates removal of PSMs with the summed TMT reporter ions intensity in the lowest 5% of all PSMs)
  unique_pep: false							# allow PSMs with unique peptides only (if true) or unique plus razor peptides (if false), as classified by Philosopher and defined in PSM.tsv files
  unique_gene: 0							# additional, gene-level uniqueness filter (0: allow all PSMs; 1: remove PSMs mapping to more than one GENE with evidence of expression in the dataset; 2:remove all PSMs mapping to more than one GENE in the fasta file)
  best_psm: true							# keep the best PSM only (highest summed TMT intensity) among all redundant PSMs within the same LC-MS run
  prot_exclude: none							# exclude proteins with specified tags at the beginning of the accession number (e.g. none: no exclusion; sp|,tr| : exclude protein with sp| or tr|)
  allow_overlabel: true							# allow PSMs with TMT on S (when overlabeling on S was allowed in the database search)
  allow_unlabeled: true							# allow PSMs without TMT tag or acetylation on the peptide n-terminus 
  mod_tag: none							# PTM info for generation of PTM-specific reports (none: for Global data; S[167],T[181],Y[243]: for Phospho; K[170]: for K-Acetyl)
  min_site_prob: -1							# site localization confidence threshold (-1: for Global; 0: as determined by the search engine; above 0 (e.g. 0.75): PTMProphet probability, to be used with phosphorylation only)
  ms1_int: true							# use MS1 precursor ion intensity (if true) or MS2 summed TMT reporter ion intensity (if false) as part of the reference sample abundance estimation 
  top3_pep: true							# use top 3 most intense peptide ions as part of the reference sample abundance estimation
  print_RefInt: false							# print individual reference sample abundance estimates for each multiplex in the final reports (in addition to the combined reference sample abundance estimate)
  add_Ref: -1							# add an artificial reference channel if there is no reference channel (-1: don't add the reference; 0: use summation as the reference; 1: use average as the reference; 2: use median as the reference)
  max_pep_prob_thres: 0							# the threshold for maximum peptide probability
  min_ntt: 0							# minimum allowed number of enzymatic termini