Executes an R script from an Azure Machine Learning experiment

`# This R module generates the templates for face and house stimulus, and the standard deviation of signals, for each signal channel and each patient.

The templates and standard deviation will be used later to generate features for training, validation, and testing data

dataset1 <- maml.mapInputPort(1) # class: data.frame
srate <- 1000

ncols <- ncol(dataset1) # get the number of columns of the training data. Training data has 67 columns. Col1: PatientID, Cols2-65, 64 channels, Col 66: Stimulus Type, Col 67: Stimulus Presentation Cycle ID
unique_patients <- unique(dataset1[,1]) # Get the list of unique patients
num_patients <- length(unique_patients) # Get the number of unique patients
templates <- as.data.frame(matrix(NA, nrow=1201_num_patients, ncol=ncols-3)) # every patient will have 1201 rows, where the first row is the standard deviation of signals, which will be used to normalize the signal
# rows 2-601 will be the house template, and rows 602-1201 will be the face template. A template is defined as the average of the signal between 200 ms before the onset of stimulus,
# and 399 ms after the onset of stimulus. So totally 600 points for a house template, or a face template.
PatientID_Col <- matrix(rep('',1201_num_patients), nrow=1201*num_patients,ncol=1) # A column of patientID, which is going to be cbind with templates

Function fh_get_events() convert stim column into events onset time, midway between an event onset and the next event. See the previous two R script modules for data splitting for more detailed description.

fh_get_events = function(stim) {
nrows <- nrow(stim)
if (stim[nrows] != 0 & stim[nrows] != 101){
stim <- rbind(stim,0)
}
tmp=c(0,stim[1:((length(stim)-1))])
b <- which((stim-tmp)!=0 )
c <- floor(diff(b)/2)
b<-b[1:length(b)-1]
d <- b + c
evs=matrix(data=NA, nrow=length(b),ncol=3)

evs[ ,1] <- b
evs[ ,2] <- d
evs[ ,3] <- stim[d]
evs<-evs[which( evs[ ,3] != 0 ), ]
evs[which( evs[ ,3] < 51 ),3] <- 1
evs[which( evs[ ,3] == 101 ),3] <- 0
evs[which( evs[ ,3] > 50 ),3] <- 2
rm(b,c,d)
return(evs)
}

Start building templates for each patient, channel, and stimulus class (house or face)

for (j in 1:num_patients){
patient_id <- unique_patients[j] # Get the current patient ID
PatientID_Col[((j-1)_1201+1):(j_1201),1] <- patient_id # Assign the same patient ID to the patientID column
data_j <- dataset1[dataset1[,1]==patient_id,] # get the data of this specific patient
ncols_j <- sum(data_j[1,] != -999999) # Determine how many valid columns this patient has (column -999999 means that this patient does not have that signal channel)
signal_train <- as.matrix(data_j[,2:(ncols_j-2)]) # get the signal for this patient, excluding those -999999 channels
signal_train <- apply(signal_train, 2, as.numeric) # convert the signal data to numeric, in case they might be treated as string features

stim_train <- as.matrix(data_j[,ncols-1]) # get the column of stim for this patient
events_train=fh_get_events(stim_train); # get the event matrix
events_train= events_train[which(events_train[,3]!=0),] # Only keep the stimulus onset time in the trainign data
events_train=events_train[,-2] # exclude the midway of events column
train_t=c(1:nrow(stim_train)); # train_t is the row index of training data
train_e=events_train; # make a copy of the stimulus onset time data
num_chans=ncol(signal_train); # get the number of channels this patient has.
tlims=c(-200,399);# times to start and end erps, this is the time window we are going to add to the stimulus onset time later. The [stimulus onset time -200, stimulus onset time+399] is the time window we used to construct the templates
erp_baseline=c(-200,49);# times to calcualte erp based upon(must be within tlims) # We take the [stimulus onset time -200, stimulus onset time+49] as the baseline for each stimulus presentation cycle, assuming that the brain has not reponded to the
# visual stimulus within 50 milliseconds after the stimulus onset
train_chans_sd <- rep(0,num_chans) # initialite a variable to hold the standard deviation of signals for each channel. It will be used to normalize the signals.
for (k in 1:num_chans){
train_chans_sd[k] <- sd(signal_train[,k]) # get the standard deviation of each channel in the training data

signal_train[,k] <- signal_train[,k]/sd(signal_train[,k]);# Normalize the scale of each signal by dividing by its own standard deviation

}

This function generates the templates for each signal, and for house and face stimulus separately,

It is just the average of each signal in the 600 ms window (-200ms before stimulus, and 399 ms after stimulus for each stimulus type

over all stimulus presentation cycles in the training data.

fh_sta = function(inputdata,events,fh_class,tlims) {
cls_times= events[which(events[,2]==fh_class),1] # get the stimulus onset time of a specified class (this is the onset time of a stimulus, not the ending time of the previous ISI)
sta=matrix(data=0, nrow=(tlims[2]-tlims[1]+1),ncol=ncol(inputdata))
for (k in 1:length(cls_times)){
sta=sta+inputdata[cls_times[k]+c(tlims[1]:tlims[2]),]; #accumulating the signals after realigning all stimulus presentation cycles along the stimulus onset time
}
sta=sta/k; # calculate the average
return(sta) # output the average as the template
}

#get sta templates

sta_h=fh_sta(signal_train,train_e,1,tlims);# templates of house stimulus
sta_f=fh_sta(signal_train,train_e,2,tlims);# templates of face stimulus

recenter stas w.r.t. baseline

for (k in 1:num_chans) {
sta_h[,k]=sta_h[,k]-mean(sta_h[c(erp_baseline[1]:erp_baseline[2])-tlims[1]+1,k]); #remove the baseline (the average between observation 1 to 250 of the template) from the template. These are the final templates.
sta_f[,k]=sta_f[,k]-mean(sta_f[c(erp_baseline[1]:erp_baseline[2])-tlims[1]+1,k]);
}

train_chans_sd <- matrix(train_chans_sd,nrow = 1,ncol = num_chans)
templates[1201_(j-1)+1,1:num_chans] <- train_chans_sd #indgest the calculated templates into the templates variable
templates[(1201_(j-1)+2):(1201_(j-1)+601),1:num_chans] <- as.matrix(sta_h)
templates[(1201_(j-1)+602):(1201*j),1:num_chans] <- as.matrix(sta_f)
}

col_names <- rep('',ncols-2)
col_names[1] <- 'PatientID'
for (i in 2:(ncols-2)){
col_names[i] <- paste('Chanel ', (i-1), sep="")
}
templates <- as.matrix(templates)
templates <- data.frame(PatientID_Col, templates, stringsAsFactors = F)
data.set <- templates
colnames(data.set) <- col_names #assign column names to the data frame before output
maml.mapOutputPort("data.set");`

t1p / innervoice Goto Github PK

innervoice's Introduction

innervoice's People

Contributors

Stargazers

Watchers

innervoice's Issues

Executes an R script from an Azure Machine Learning experiment

The templates and standard deviation will be used later to generate features for training, validation, and testing data

Function fh_get_events() convert stim column into events onset time, midway between an event onset and the next event. See the previous two R script modules for data splitting for more detailed description.

Start building templates for each patient, channel, and stimulus class (house or face)

This function generates the templates for each signal, and for house and face stimulus separately,

It is just the average of each signal in the 600 ms window (-200ms before stimulus, and 399 ms after stimulus for each stimulus type

over all stimulus presentation cycles in the training data.

recenter stas w.r.t. baseline

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent