Giter Site home page Giter Site logo

Comments (4)

pavel-shliaha avatar pavel-shliaha commented on September 3, 2024

my current code

addOverhangs <- function (pep_seq, proteins, maxLength,
                          preferN = FALSE, preferC = FALSE){

  proteinSeq <- grep (pep_seq, proteins, value = TRUE)
  proteinSeqAA <- strsplit (proteinSeq, split = "")[[1]]

  if (length (proteinSeq) > 1){
    resultList <- list ("AA_before_20" = NA, "AA_after_20"  = NA, 
                        "spikeTide" = pep_seq, "result" =  "non_proteotypic")
    return (resultList)
    stop ("non-proteotypic")
  } 


  pepPosition <- regexpr (pep_seq, proteinSeq)[1]
  pepLength   <- nchar (pep_seq)

  ###############################################################################
  #  add 20 aa before

  aaStart    <- pepPosition - 20

  if (aaStart > 0) {
    AA_before_20 <- paste0 (proteinSeqAA[aaStart : (pepPosition - 1)],  collapse = "")
  } else {
    AA_before_20 <- paste0 (proteinSeqAA[1 : (pepPosition - 1)],  collapse = "") 
  }


  ###############################################################################
  # add 20 aa after

  aaEnd    <- pepPosition + pepLength + 20

  if (aaEnd < nchar (proteinSeq)) {
    AA_after_20 <- paste0 (proteinSeqAA[(pepPosition + pepLength) : aaEnd],  collapse = "")
  } else {
    AA_after_20 <- paste0 (proteinSeqAA[(pepPosition + pepLength) : nchar (proteinSeq)],  collapse = "") 
  }

  # apply the following rules:

  ##############################################################
  # 1) for the preceeding  AA

  aaBefore <- strsplit (AA_before_20, split = "")[[1]]
  aaBasic  <- which (aaBefore == "K" | aaBefore == "R")

  if (length (aaBasic) > 1){
    aaBasic2 <- c (0, aaBasic[1:(length (aaBasic) - 1)]) 
    firstGoodAA   <- which ((aaBasic - aaBasic2 > 3))

    if (length (firstGoodAA) > 0){
      firstGoodAA <- aaBasic[max (firstGoodAA)]
      aaToAddBefore <- paste (aaBefore[(firstGoodAA - 3) : length (aaBefore)] , collapse = "")
    } else {
      aaToAddBefore <- tail (aaBefore, 4)
    }

  } else {
    aaToAddBefore <- tail (aaBefore, 4)
  }

  overhang_before <- paste (aaToAddBefore, collapse = "")

  ############################################################################
  # 2) for the following AA

  aaAfter <- strsplit (AA_after_20, split = "")[[1]]
  aaBasic  <- c (0,  which (aaAfter == "K" | aaAfter == "R"))

  if (length (aaBasic) > 1) {
    aaBasic2 <- c (aaBasic[2:length (aaBasic)], length (aaAfter))
    firstGoodAA   <- which (aaBasic2 - aaBasic > 2)

    if (length ( firstGoodAA) > 0){
      firstGoodAA   <- aaBasic[min (firstGoodAA)]
      aaToAddAfter  <- aaAfter[1: (firstGoodAA + 3)]
    } else{
      aaToAddAfter <- head (aaAfter, 3)
    }

  } else {
    aaToAddAfter <- head (aaAfter, 3)
  }

  overhang_after <- paste (aaToAddAfter, collapse = "")

  ############################################################################
  # add overhangs

  length_with_overhangs <- sum (nchar (overhang_before), nchar (pep_seq),nchar (overhang_after))

  # option 1: adding full overnags
  if (length_with_overhangs <= maxLength ){
    spikeTide <-  paste (overhang_before, pep_seq, overhang_after , sep = ".")
    result <- "complete_overhangs"
  } 

  # option 2: shrotening preceeding overhang (succeding overnhamg is 3 aminoacids long)
  if (length_with_overhangs > maxLength &
      nchar (pep_seq) + 7   <= maxLength &  
      nchar (overhang_before) > 4 &  nchar (overhang_after) < 4 ){

    aaAllowedBefore <- maxLength - nchar (pep_seq) - nchar (overhang_after)
    aaBefore <- strsplit (overhang_before, split = "")[[1]]
    aaBefore <- aaBefore[(length (aaBefore) - aaAllowedBefore + 1) :  length (aaBefore)]
    new_overhang_before <- paste (aaBefore, collapse = "")
    spikeTide <-  paste (new_overhang_before, pep_seq, overhang_after , sep = ".")
    result    <- "N_overhang_shortened"
  } 

  # option 3: shrotening succeding overhang (preceding overhang is 4 aminoacids long)
  if (length_with_overhangs > maxLength &
      nchar (pep_seq) + 7   <= maxLength &
      nchar (overhang_before) < 5 &  nchar (overhang_after) > 3 ){

    aaAllowedAfter <- maxLength - nchar (pep_seq) - nchar (overhang_before)
    aaAfter <- strsplit (overhang_after, split = "")[[1]]
    aaAfter <- aaAfter[1 :aaAllowedAfter]
    new_overhang_after <- paste (aaAfter, collapse = "")
    spikeTide <-  paste (overhang_before, pep_seq, new_overhang_after , sep = ".")
    result    <- "C_overhang_shortened"
  } 

  # option 4: shrotening both overhangs, if both need to be shortened
  if (length_with_overhangs >  maxLength &
      nchar (pep_seq) + 7   <= maxLength &
      nchar (overhang_before) > 4 &  nchar (overhang_after) > 3 ){

    new_overhang_before <- paste0 (tail (strsplit (overhang_before, split = "")[[1]] , 4), collapse = "")
    new_overhang_after  <- paste0 (head (strsplit (overhang_after, split = "")[[1]] , 3), collapse = "")

    spikeTide <-  paste (new_overhang_before, pep_seq, new_overhang_after, sep = ".")
    result    <- "both_overhangs_shortened"
  } 


  # option 5: add a single overhang
  # important do not add less than 4 amino acids N-terminus and less than 3 amino acids on C-terminus
  if ( nchar (pep_seq) + 7  > maxLength){

    numAAToAdd <- maxLength - nchar (pep_seq)

    # if user wants overhang on N-terminus
    if (preferN & numAAToAdd >= 4) { # add amino acids
      if (nchar (overhang_before) == 4 ){
        spikeTide <-  paste (overhang_before, pep_seq, sep = ".")
        result    <- "N_overhang_only"
      } else {
        aaAllowedBefore <- maxLength - nchar (pep_seq)
        aaBefore <- strsplit (overhang_before, split = "")[[1]]
        aaBefore <- aaBefore[(length (aaBefore) - aaAllowedBefore + 1) :  length (aaBefore)]
        new_overhang_before <- paste (aaBefore, collapse = "")
        spikeTide <-  paste (new_overhang_before, pep_seq, sep = ".")
        result    <- "N_overhang_only_shortened"
      }  
    }

    # if user wants overhang on C-terminus
    if ((preferC & numAAToAdd >= 3) | numAAToAdd == 3) { # add amino acids
      if (nchar (overhang_after) == 3 ){
        spikeTide <-  paste (pep_seq, overhang_after,  sep = ".")
        result    <- "C_overhang_only"
      } else {
        aaAllowedAfter <- maxLength - nchar (pep_seq)
        aaAfter <- strsplit (overhang_after, split = "")[[1]]
        aaAfter <- aaAfter[1 :aaAllowedAfter]
        new_overhang_after <- paste (aaAfter, collapse = "")
        spikeTide <-  paste (pep_seq, new_overhang_after , sep = ".")
        result    <- "C_overhang_only_shortened"
      }  
    }


  } 


  # return the results

  resultList <- list ("AA_before_20" = AA_before_20,
                      "AA_after_20"  = AA_after_20, 
                      "spikeTide" = spikeTide, 
                      "result" = result)

  return (resultList)
}

from cleaver.

pavel-shliaha avatar pavel-shliaha commented on September 3, 2024

A couple of more comments:

  1. the output: I believe the user might want the following output:
  • the new sequence: YDSKVNQADNLIEVGKGPEK
  • the new sequence where the cleavage sites are shown as dots YDSK.VNQADNLIEVGK.GPEK
  • the complete overhang
  • the suggested overhang (might not be the same as complete if shortened)
  • result spelled out: e.g. "complete_overhangs" or "C_overhang_only"
  • 20 amino acids before and 20 amino acids after for user to be able to examine how overhangs were created
  1. an example table (with peptide sequences and output are in):

"data:\RAW\pvs22_QTOF_DATA_data3\data_for_synapter_2.0\cleaver_overhangs"

from cleaver.

pavel-shliaha avatar pavel-shliaha commented on September 3, 2024
  1. sometimes a company will enforce peptide synthesis to end with a certain amino acid (JPT enforces K|R on the C-terminus). There should be an argument to this end, e.g. end = "K". Note this enforced AA is a part of peptide being ordered hence it should be considered when allowing maximum peptide sequence length.

from cleaver.

sgibb avatar sgibb commented on September 3, 2024

Closed via lgatto/Pbase#6.

from cleaver.

Related Issues (7)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.