#!/usr/bin/env perl

#   A course on sequence Alignments
# 	Cedric Notredame 2001
#   All rights reserved
#   Files can be redistributed without permission
#   Comercial usage is forbiden
#   nw.pl : Needlman and Wunsch
#   dynamic programming (special case, gop=0).
#   each sequence comes in a differrent file


#Parameters:
#matrix=idmat: 10 pour un Match, -10 pour un MM
$match=1;
$mismatch=-10;
$gep=-4;  # cost for opening an indel in sequence A


# Read The two sequences from two fasta format file:



open F0, $ARGV[0];
while ( <F0>){$al0.=$_;}
close F0;

open F1, $ARGV[1];
while ( <F1>){$al1.=$_;}
close F1;

#extract the names and the sequences
($name0,$seq0)=($al0=~/>(.*)\n(.*)/);
($name1,$seq1)=($al1=~/>(.*)\n(.*)/);

#Clean the sequences
$seq0=~s/[\s\d\n]//g;
$seq1=~s/[\s\d\n]//g;

print "-$seq0-";
#split the sequences into residues
@res0=split (//,$seq0);
@res1=split (//,$seq1);

#Smith and Waterman Recursion: Find the score of the optimal alignment
$len0=@res0;
$len1=@res1;
$zero=0;


for ($i=0; $i<=$len0; $i++){$smat[$i][0]=$zero;$tb[$i][0 ]=2;}
for ($j=0; $j<=$len1; $j++){$smat[0][$j]=$zero;$tb[0 ][$j]=2;}
	
for ($i=1; $i<=$len0; $i++)
	{
	for ($j=1; $j<=$len1; $j++)
		{
		#calcul du score
		if ($res0[$i-1] eq $res1[$j-1]){$s=$match;}
		else {$s=$mismatch;}
		
		$sub=$smat[$i-1][$j-1]+$s;
		$del=$smat[$i  ][$j-1]+$gep;
		$ins=$smat[$i-1][$j  ]+$gep;
		
		if   ($sub>$del && $sub>$ins && $sub> $zero){$smat[$i][$j]=$sub;$tb[$i][$j]=0;}
		elsif($del>$ins && $del>$zero              ){$smat[$i][$j]=$del;$tb[$i][$j]=-1;}
		elsif($ins>$zero                           ){$smat[$i][$j]=$ins;$tb[$i][$j]=1;}
		else {$smat[$i][$j]=$zero;$tb[$i][$j]=2;}
	
		if ($smat[$i][$j]> $best_score)
		  {
		     $best_score=$smat[$i][$j];
		     $best_i=$i;
		     $best_j=$j;
		   }
	      }
	
      }
print "SCORE=$best_score\n";

#Traceback Procedure
$i=$best_i;
$j=$best_j;
$aln_len=0;
while (!($i==0 && $j==0) && $tb[$i][$j]!=2)
	{
	if ($tb[$i][$j]==0)
		{
		$aln0[$aln_len]=$res0[--$i];
		$aln1[$aln_len]=$res1[--$j];
		}
	elsif ($tb[$i][$j]==-1)
		{
		$aln0[$aln_len]='-';
		$aln1[$aln_len]=$res1[--$j];
		}
	elsif ($tb[$i][$j]==1)
		{
		$aln0[$aln_len]=$res0[--$i];
		$aln1[$aln_len]='-';
		}
	$aln_len++;
	
	}
$i++;
$j++;
#Output en Fasta:
print ">$name_list0[0]: [$i-$best_i]\n";
for ($i=$aln_len-1; $i>=0; $i--){print $aln0[$i];}
print "\n";
print ">$name_list1[0]: [$j-$best_j]\n";
for ($j=$aln_len-1; $j>=0; $j--){print $aln1[$j];}
print "\n";



                         
