seq1='TACGGGTAT';
seq2='GGACGTACG';
[s,a]=nwalign(seq1,seq2,'scoringmatrix','blosum62','gapopen',11,'extendgap',1)
function [score, alignment, startat, matrices] = nwalign(seq1,seq2,varargin)
match = 5;
mismatch= -2;
gab=-6;
isAminoAcid = true;
scale=1;
if isstruct(seq1)
seq1 = seqfromstruct(seq1);
end
if isstruct(seq2)
seq2 = seqfromstruct(seq2);
end
if nargin > 2
if rem(nargin,2) == 1
error('Bioinfo:IncorrectNumberOfArguments',...
'Incorrect number of arguments to %s.',mfilename);
end
okargs = {'scoringmatrix','gapopen','extendgap','alphabet','scale','showscore','glocal'};
for j=1:2:nargin-2
pname = varargin{j};
pval = varargin{j+1};
k = find(strncmpi(pname, okargs,numel(pname)));
if isempty(k)
error('Bioinfo:UnknownParameterName',...
'Unknown parameter name: %s.',pname);
elseif length(k)>1
error('Bioinfo:AmbiguousParameterName',...
'Ambiguous parameter name: %s.',pname);
else
switch(k)
case 1
if isnumeric(pval)
ScoringMatrix = pval;
else
if ischar(pval)
pval = lower(pval);
end
try
[ScoringMatrix,ScoringMatrixInfo] = feval(pval);
catch allExceptions
error('Bioinfo:InvalidScoringMatrix','Invalid scoring matrix.');
end
end
case 2
gapopen = -pval;
case 3
gapextend = -pval;
setGapExtend = true;
case 4
isAminoAcid = bioinfoprivate.optAlphabet(pval,okargs{k}, mfilename);
case 5
scale=pval;
case 6
showscore = bioinfoprivate.opttf(pval, okargs{k}, mfilename);
case 7
glocal = bioinfoprivate.opttf(pval, okargs{k}, mfilename);
end
end
end
end
if ~exist('ScoringMatrix','var')
if isAminoAcid
[ScoringMatrix,ScoringMatrixInfo] = blosum50;
else
[ScoringMatrix,ScoringMatrixInfo] = nuc44;
end
end
if exist('ScoringMatrixInfo','var') && isfield(ScoringMatrixInfo,'Scale')
scale=scale*ScoringMatrixInfo.Scale;
end
if isAminoAcid
if ischar(seq1)
seq1 = strrep(seq1,'?','X');
else
seq1(seq1 == 26) = 23;
end
if ischar(seq2)
seq2 = strrep(seq2,'?','X');
else
seq2(seq2 == 26) = 23;
end
end
if isAminoAcid && ~(isaa(seq1) && isaa(seq2))
error('Bioinfo:InvalidAminoAcidSequences',...
'Both sequences must be amino acids, use ALPHABET = ''NT'' for aligning nucleotides.');
elseif ~isAminoAcid && ~(isnt(seq1) && isnt(seq2))
error('Bioinfo:InvalidNucleotideSequences',...
'When ALPHABET = ''NT'', both sequences must be nucleotides.');
end
if ischar(seq1)
seq1=upper(seq1);
if isAminoAcid
intseq1 = aa2int(seq1);
else
intseq1 = nt2int(seq1);
end
else
intseq1 = uint8(seq1);
if isAminoAcid
seq1 = int2aa(intseq1);
else
seq1 = int2nt(intseq1);
end
end
if ischar(seq2)
seq2 = upper(seq2);
if isAminoAcid
intseq2 = aa2int(seq2);
else
intseq2 = nt2int(seq2);
end
else
intseq2 = uint8(seq2);
if isAminoAcid
seq2 = int2aa(intseq2);
else
seq2 = int2nt(intseq2);
end
end
m = length(seq1);
n = length(seq2);
if ~n||~m
error('Bioinfo:InvalidLengthSequences','Length of input sequences must be greater than 0');
end
scoringMatrixSize = size(ScoringMatrix,1);
highestVal = max([intseq1, intseq2]);
if highestVal > scoringMatrixSize
if isAminoAcid
anyVal = aa2int('X');
else
anyVal = nt2int('N');
end
if scoringMatrixSize >= anyVal
intseq1(intseq1>scoringMatrixSize) = anyVal;
intseq2(intseq2>scoringMatrixSize) = anyVal;
else
error('Bioinfo:InvalidSymbolsInInputSequences',...
'Sequences contain symbols that cannot be handled by the given scoring matrix.');
end
end
if glocal
algorithm = 3;
else
algorithm = 1;
end
if setGapExtend
if showscore
[score, path(:,2), path(:,1), F(:,:,1), F(:,:,2), F(:,:,3)] = ...
affinegapmex(intseq2, intseq1, gapopen, gapextend, ScoringMatrix, algorithm);
elseif nargout == 4
[score, path(:,2), path(:,1), F(:,:,1), F(:,:,2), F(:,:,3), pointer] = ...
affinegapmex(intseq2, intseq1, gapopen, gapextend, ScoringMatrix, algorithm);
pointer = shiftdim(pointer,1);
else
[score, path(:,2), path(:,1)] = affinegapmex(intseq2, intseq1, ...
gapopen, gapextend, ScoringMatrix, algorithm);
end
else
if showscore
[score, path(:,2), path(:,1), F] = ...
simplegapmex(intseq2, intseq1, gapopen, ScoringMatrix, algorithm);
elseif nargout == 4
[score, path(:,2), path(:,1), F, pointer] = ...
simplegapmex(intseq2, intseq1, gapopen, ScoringMatrix, algorithm);
else
[score, path(:,2), path(:,1)] = ...
simplegapmex(intseq2, intseq1, gapopen, ScoringMatrix, algorithm);
end
end
path = path(sum(path,2)>0,:);
path = flipud(path);
score = scale * score;
if nargout<=1 && ~showscore
return
end
alignment = repmat(('- -')',1,size(path,1));
alignment(1,path(:,1)>0) = seq1;
alignment(3,path(:,2)>0) = seq2;
h=find(all(path>0,2));
if isAminoAcid
noGaps1=aa2int(alignment(1,h));
noGaps2=aa2int(alignment(3,h));
else
noGaps1=nt2int(alignment(1,h));
noGaps2=nt2int(alignment(3,h));
end
htodel=max([noGaps1;noGaps2])>scoringMatrixSize;
h(htodel)=[];
noGaps1(htodel)=[];
noGaps2(htodel)=[];
value = ScoringMatrix(sub2ind(size(ScoringMatrix),double(noGaps1),double(noGaps2)));
alignment(2,h(value>=0)) = ':';
alignment(2,h(noGaps1==noGaps2)) = '|';
startat = [1;1];
if nargout > 3
matrices.Scores = F;
matrices.Pointers = pointer;
end
if showscore
figure
F=scale.*max(F(2:end,2:end,:),[],3);
clim=max(max(max(abs(F(~isinf(F))))),eps);
imagesc(F,[-clim clim]);
colormap(privateColorMap(1));
set(colorbar,'YLim',[min([F(:);-eps]) max([F(:);eps])])
title('Scoring Space and Winning Path')
xlabel('Sequence 1')
ylabel('Sequence 2')
hold on
plot(path(all(path>0,2),1),path(all(path>0,2),2),'k.')
end
function pcmap = privateColorMap(selection)
switch selection
case 1, pts = [0 0 .3 20;
0 .1 .8 25;
0 .9 .5 15;
.9 1 .9 8;
1 1 0 26;
1 0 0 26;
.4 0 0 0];
otherwise, pts = [0 0 0 128; 1 1 1 0];
end
xcl=1;
for i=1:size(pts,1)-1
xcl=[xcl,i+1/pts(i,4):1/pts(i,4):i+1];
end
pcmap = interp1(pts(:,1:3),xcl);