¡Descarga Bloc 2 de Bioinformática: Código desconocido y más Apuntes en PDF de Biotecnología solo en Docsity!
Bloc 2
!"#$%&'()%+$,-)&.#** _!"#$%&'((!)+,-.!($%+$$+/-_*
26.-1&'A0)%$.-$26"'-#$'()&+,-#$26"-),"01#3$0BC$=#$-"$+,-$)010)'-1'A0$"0$-10$56#'D7-&C2)0:$
?10$ &6$ -#$ 9,#)0$ -#',.01$ 7-&#$ )6&)1-'#3$ &$ '16##6#$ ./0+,-#'$ 7-$ #&%$ +,-$ 9,#+,-2$ -#',.01$ :3:4# @4# A42A4#
BC0=:;241103=6# .&#$./,&$6170-$E !"#$%&!'(#)#"(!"(+!,"(%-$,(.$'/0()(12#)30(!#45 F:$
!B-25"-$ .-"$ 5010.720$ 0)',0"G$ =#$ )62$ #$ 21=##2$ ,&$ +,0.1-:$ ?90$ -#',.*0>-2$ "0$ )010$ ./,&$ 5-1#6&0'7-3$
2-&'1-#$+,-$010$-#',.-2$'6'0$"0$20'7-$#-&)-10$.-$20&-10$#2,"'@&0:$?BC$801@$+,-$69'&7,-2$26"'H##*20$
DEF#G6H# I#$,&$86120'$-B5-12-&'0"3$90#0'$-&$"0$#H&'-#$6$8B0)%$.-$ 63=B;6 $E 6$!("-!,+2,7!,(2*(8)1#2 9 ):;!4#!" F:$$
;-#$ 63=B;6 $1-51-#-&'-&$0$,&$7-&3$,&0$516'-J&03$,&$2-'09C"'K$L-1$0$)0.0#),&$.-"#$M5,&'-'#N$6$56,#3$'-&2$
,&0$#6&.0$-#5-)H8)03$$-#$'169-&$'6'#$#691-$,&$ 6E932:#6?@0B :$
?+,-#'$ #,561'$ -#$ 56#0$ .&#$ ./,&0$ 26#'103$ 6&$ '-&2$ "-#$ /3@F1E@;6# B04=4 3$ +,-$ 5-1$ 491.0)%$ #/,&10&$ 0$ "-#$
&6#'1-#$ 63=B;6 $8B0.-#$#691-$-"$#,561'$8H#):$
;0$ -%"&+0-10(23!(+ $#/0&62-&0$ 923>;7# 2-&'1-#$+,-$"-#$ &!+"0- #/0&62-&-&$ :42I;:6 :$O*'P0&Q0&'$ J@E32;61F=104 3$
56.1-2$2-.*1$-"$ =0K;@@#BC;A92;660L# .-$)0.0#),&$.-"#$'017-'#$)611-#56&-&'#$0$"0$#6&.0$5169"-20:$
L-1$ -B-25"-3$ .8-1-&'#$ (%,%$- %* !"#0"-!#+#- $ .-$ 8",61-#)(&)0$ -&#$ &.)010&$ #$ ,&$ 7-&$ -#'@$ 2=#$ 6$ 2-&<#$
-B51-##0'$ -&$ ,&0$ 26#'10$ 5169"-20:$ ?+,-#'0$ 8",61-#)(&)0$ -#$ 56'$ 2-#,101$ 71@)-#$ 0$ ,&$ #68'R01-$ ./ 4=M@060#
BC0/4:I; :$
$+N(#8O#-)%P&%%&Q(#
D S-$516'-J&-#$
D S-$'-B'#$
D S-$ 8'& G$#-10$"0$ :&:&)#!42 3$'6'$0""C$+,-$-22070'A-20$"0$)("T","0:$L6.-&$#-1$.-$ I;=6 $6$9=$.-$ ('+6 :$
D S/ ;A92;660L G$#-1*0$-"$ &:1!( +,-$#/-#'@$""-7&'$-&$0+,-""$262-&'3$21-2$-"$ /%'& 3$+,-$1-51-#-&'0$0""C$+,-$
#/-#'@$-B51-##0&'$-&$,&$262-&'$.-'-12&0':$?+,-#'#$2)160110<#$./-B51-##%$56.-&$'-&1$#6&.-#$.-$
o 18'&#
o S/ 3@0I3=E1@;?:0B6 G$4$40$.8-1-&'#$201+,-#3$)62$010$ ??@'!#1&70(&(%-2*#1!" :$
!"#$2)160110<#$)0.0$>-70.0$#/,'"'A-&$2=#3$P0$+,-$>0&$90B0&'$-"$51-,:$U,0"#->6"$-#',.*3$7-&-10"2-&'$-"$+,-$
>6"$#09-1$=#$+,*&#$7-&#$-#'0&$#-&'$-B51-##0'#:$
;-#$ 49@014103=6# #%&$26"'-#$$26"'$.>-1#-#G$
D )3/942410L# -&'1-$&.>.,,#$#0&#$$20"0"'#G$+,($#/-B51-##03$+,&#$7-&#$'-&2$.*8-1-&'#K$
D )@4660J01410L# 26"-),"01$.-$20"0"'*-#$)625"-B-#$
D B;=:0J01410L# .-$7-&#$-#5-)H8)#$./,&0$20"0"'*0$E !"(4),!&7(4)'(2( 3!$.+ ()( -!4"+#'$0 F$
D +2;B0110L $.-$1-#56#'0$0$,&$'10)'02-&'$
D 8;:;110L# .-$2,'0)6&#$$56"2618#2-#$./,&$7-&$)6&)1-':$
!"$ 5169"-20$ >-$ 0$ "/4610$ ./0&0"'A01$ "-#$ .0.-#3$ P0$ +,-$ ./0+,-#'#$ 2)160110<#$ 69'-&2$ 710&$ +,0&''0'$
!&$0+,-#'$9"6)$801-2G$
!" V10&#)15'C2)0$E '&41)2112@" F$
R" ?"*&-02-&'$5-1$501-""-#$
S" ?"&-02-&'$2W"'5"-$
Bloc 2
_5 6789:;<6=>?@>>@A_*
=%.(%"-#$'B.'".!($%+$$+/C*
!B#'-B-&$26"'#$'5,#$.-$2)160110<#$./-B51-##*%3$5-1C$&6#0"'1-#$)62-&'01-2$-"#$.-$ B36#13@326 $EXLYVV!SF3$
+,-$#%&$-"#$2=#$20&,0"#3$$-"#$0110<#$./ 3@0I3=E1@;?:0B6# #&'-''A0'#$&$#*',:$
(+P$$O8#
:!--0"/!1$%&'((!)&0,(D!1E* '-&2$,&0$ >0>@03:;14#B;#8'& 3$+,-$)6&'=$-"#$7-&#$+,-$-&#$&'-1-##-&:$O*'P0&Q0&'$
LZ[$ $ 025"8)0)%3$ 070801-2$ -"#$ )"6&#$ E'16##6#$ .-$ S?F$ +,-$ -&#$ &'-1-##&3$ $ -"#$ 491.01-2$ 0"$ #,561'$ 8H#)$
2'P0&Q0&'$'()&+,-#$ 0=#K0:23 3$090&#$.-$56#01$"0$&6#'10$26#'10: (
!"#$ 56#-2$ -&$ ,&$ 561'0$ $ 51-501-2$ "0$ &6#'10$ 51C50$ #6&.0:$ ;0$
M710-""0N$ +,-$ 69'&7,-2$ #-1@$ ,&0$ 99"*6'-)0$ .-$ '6'#$ -"#$ 7-&#$ +,-$
>6"7,-2$ )62516>01:$ ?BC$ -&#$ 5-12-'$ 8-1$ -"$ &6#'1-$ 5165$
&'-1-##-&$E ,)(.!'(%!(4)'8121('&41)2112@"(4)'!14&2" F$
?+,-#'#$ 2)160110<#$ #6"-&$ #-1$ .-$ B36# 14=4@6 3$ =#$ 0$ .1$ +,-$ >-,1-2$
.6#$ )6"61#$ #-76&#$ -"$ 7-&$ +,-$ -#'*7,-2$ -B51-##0&':$ !#$ 90#-&$ -&$ "0$
T0>20B410L# 13/9;:0:0K4 3$ )0.0$ #6&.0$ 9,#)01@$ "0$ #->0$ .*0&03$ 5-1C$
40,1@$.-$)625-'*1$029$-"#$7-&#$-B51-##0'#$5-1$,&0$#6&.0$)6&'16":$$
V-&*2$ %'&# .-$"0$ /36:24#;A9;20/;=:4@# *$ %'& $.-$ 2;J;2F=104 $6$
)6&'16":$!"#$50##-2$0$ 8'&# E 1!A!1"!(#12,"41&8#2"! F$$-"#$ D!F$!&0.
+.F3,'%$%3%$- E 42%2(')"#12(2':($,(4)*)1(%&?!1!,# F:$
;"0>61#$ -"#$ '1-2$ #691-$ "0$ #6&.0$ $ .-B-2$ +,-$ 491.&:$ !"#$ 50##-2$
5-1$ ,&$ @M6;2# 5-1$ 0$ +,-$ -2-'&$ 8",61-#)(&)0$ E 81&'!1( 8!*( A!1%0(
%!"81B"(8!(A!1'!0(&(%!"81B"("):1!8)"!'(!"(&'2#+!"0(%),2,#($,2(
?)#)(4)':&,2%2 F:$
?708-2$ "0$ M86'6N$ .-$ "0$ 8",61-#)(&)0$ $ 0&0"'A-2$ "/-B51-##%$ .-$
)0.0#),&$ .-"#$ 7-&#:$ X*$ =#$ >-1.3$ >-12-""3$ 716)$ 6$ &-71-3$ -')3$ -&#$
&.)01@$ -"$ =0K;@@# BC;A92;660L $ .-"#$ 7-&#$ .-$ "0$ &6#'10$ 26#'10$
-B5-1*2-&'0":$$
;0$ /4:20E#BC;A92;660L#6;2M#=E/F2014 3$6&$4$'&.1-2$-"#$1-#,"'0'#$.-$
C=2%2(4)$',2()(?&2("!1/($,(&,%&A&%$$0(!#45(D2(&,?)1'24&E(6$!(!,(42%2(&,%&A&%$$(!"#/(!,(?)1'2(%!(F1!4#2,+*!G(
!"(4),A!1#!&7(!,($,2(F4)*$',2G5H(IJ(KILKI=(
- )%P&%%&Q(#8O#PUVP(#
!&$0+,-#'$'5,#$.-$2)160110<$&6$4$40$ T0>20B410L#13/9;:0:0K4 3$-#$8010&$&.>.,0"2-&'3$029$&62=#$"0$26#'10$
-B5-12-&'0":$L6#01-2$"0$26#'10$./,&$&.>.,$E 42%2(4.&8(4),#B(')"#1!"(,)'B"(%-$,(")*(#&8$" F:$
I#$ ,&0$ '-)&6"670$ ,&0$ 2)0$ 2=#$ 0>0&Q0.0$ +,-$ "/0&'-1613$ 69'-&&'$ -"#$ 2*)160110<#$ 029$ "-#$ #6&.-#$
#&'-''A0.-#$.1-)'02-&'$#691-$-"$)453$-&$)625'-#$.-$8-1D46$&$>'16$$.-#51=#$491*.01D"-#$&6#0"'1-#:$
;0$'()&)0$./69'-&1$,&$2)160110<$./6"76#3$.6&)#3$-#$80$ 0=#60:E 3$$"-#$#6&.-#$-#$80&$.1-)'02-&'$#691-$-"$)4*53$
2'P0&Q0&'$,&0$2@+,&0$E ')#('B"(F?&2:!G0(;2(6$!(B"('B"('!4/,&4(&(&(1!"#!'(!(?24#)1(.$'/ F:$?$2=#3$5-1$0$
)0.0$7-&3$'-&*2$ E=#I2E9 $.-$#6&.-#3$-&$)625'-#$./,&0$#6"0:$
Bloc 2
:;9@S@T5@5U;9E # &-)-##'-2$2=#$-+,502-&'3$2-#,1-#$./-B51-##*%$09#6",'-#$
E&6$)62501-2$029$1-#FK
!"R"#OA9;20/;=: 6 #4/>#/0123422456#
]-2$ .-$ )6&'16"01$ "-#$ J3=:6# B;# K4204>0@0:4: 3$ 0*BH$ )62$ -"$ 101@;# B;# K0B4# ./,&$
-B5-12-&'$ 029$ 2)160110<#:$ ]-2$ .-$ #09-1$ +,&-#$ 51-7,&'-#$ '-&2$ *$ "-#$
56 ##9"-#$1-#56#'-#$090&#$.-$8-1$)05$-B5-12-&':$
;-#$ J3=:6#B;#K4204>0@0:4:# 6$./-1161$56.-&$#-1$ 4@;4:?20;6 $6$ 606:;/M:0XE;6 :$$
;0$ K4204>0@0:4:#>03@?I014# =#$26"'$2561'0&'3$P0$+,-$56'$#-1$+,-$"-#$.8-1(&)-#$-&'1-$#0&#$$20"0"'#$#7,&$2=#$
.-7,.-#$0$"0$ %&?!1M,4&2(:&)N+&42(6$!(,)(82"(2(2('22#&2 :$$
L6'#-1$#-1$462-$6$.6&0$08-)'0$26"'$0$"0$
;"0>61#$ 0)0901-2$ -B51-##0&'$ "0$
'0&'03$P0$+,-$'1-90""-2$029$ 63XE;6 :$
X$ 56.-23$ 07081-2$ '-B*'$ #@$ $ 20"0"'$ &0,
.+#0!N* !"&!V!&'' 3$ ->'0&'$ "0$ >0109"'0'$
&'10-#5-)H8)0:$ L-1C$ #*$ =#$ .-$ "0$ #0&7$
;0$ >0109"*'0'$ .-7,.0$ 0$ "0$ /4=09E@410L#
B;#@;6#/36:2;6 $'0 29 =$#/40$.-$)6&'16"01$
'0&'$)62$-#$5,7,:$?$"/4610$./-B'1-,1-$-"$[?3$$)6#-#$0BH3$'029=$-#$1-8"-)'1@$-&$-"#$&6#'1-#$1-#,"'0'#:$
L-1$ 0$ ->'01$ "0$ >0109"'0'$ 606:;/M:014 3$ 56.-2$ -#'20 1 $ "-#$ )611-))6&#$ &-)-##@1-#$ 0$ 501'1$ .-$ "-#$ .0.-#3$
05")0&'$ "-#$ =32/4@0:W4103=6 :$ V029=$ 56.-2$ 1-.,1$ 0+,-#'0$ >0109"*'0'$ 8-&'$ "-#$ 14@0>24103=6 $ .-"#$ 0501-""#$
,'"'A0'#:$$
;0$>0109"'0'$ 4@;4:?2 0 4# =#$&.-8&.03$$)62$0$'0"$08-)'0$0$)0.0$)6256&-&'$.-$86120$.8-1-&':$L-1$0$->*'01D"03$
4-2$.-$'-&1$)"01$-"$.##-&<$-B5-12-&'0"$+,-$>6"-2$8-13$0BH$)62$"0$ 0=J;2F=104#;6:4BY6:014# 5-1$0$)0"),"01$)62$
08-)'0$0"$&6#'1-$-B5-12-&':$]-2$.-$8-1$ 2F9@0XE;6 $.-$)0.0#),&$.-"#$-B5-12-&'#:$\6$56.-2$070801$&62=#$,&$
&.>*.,,^^$
V6'#$ 0+,-#'#$ 516)-.2-&'#$ #/&'-71-&$ 0"$ Z32[J@3Z $
.-"$&6#'1-$-B5-12-&'3$$5-1$'0&'$-&#$.6&010&$-"$ 101@;#
B;#K0B4 $.-"$& 6 #'1-$2*)160110<:$
WP0-#!%"-F!%,M4!O'0-XY:!--0"/0N10$!.0"#+,XY
;N10$!.0"#&0,,+F%$+#%$!*
;0$ K4204>0@0:4: $=#$26"'$2561'0&'3$$"/4-2$.-$'-&*1$-&$
_&0$ >-70.0$ '-&*2$ "0$ 20'7-$ .-"$ 2)160110<3$ 4-2$ .-$
8-1$ ,&$ 92;9231;664: $ 5-1$ 0$ *&'-151-'01D"03$ P0$ +,-$ #-1@$
-"$M 13=:23@#B;#XE4@0:4: N$.-$"0$&6#'10$*20'7-:$
&%&]U*$&$#
(($O-^$)& G$08-)'0$*7,0"$0$'6'-#$
&UO&$,%*& G$56'$08-)'01$.-$
)6256&-&'$.-$"/-B5-1*2-&':$
Introducción a los Microarrays 33
Alex Sánchez &
Esteban Vegas
Fuentes de variabilidad
- Biological Heterogeneity in Population.
- Specimen Collection/ Handling Effects.
- Tumor: surgical bx, FNA.
- Cell Line: culture condition, confluence
level.
- Biological Heterogeneity in Specimen.
- RNA extraction.
- RNA amplification.
- Fluor labeling.
- Hybridization.
- Scanning.
- – PMT voltage.
- – laser power.
(Geschwind, Nature Reviews Neuroscience , 2001)
Introducción a los Microarrays 34
Alex Sánchez &
Esteban Vegas
Tipos de variabilidad
- La variabilidad sistemática es aquella que
afecta de manera similar a todas las mediciones
- Cantidad de material disponible
- Instrumental de laboratorio
- La variabilidad aleatoria puede afectar de forma
distinta a cada componente del experimento
- Calidad del material
- Eficiencia de los procedimientos de laboratorio
Introducción a los Microarrays 35
Alex Sánchez &
Esteban Vegas
Cómo se afronta la variabilidad
- Cada tipo se trata de forma distinta
- Variabilidad Sistemática
- Podemos estimar las correciones necesarias a partir de los datos:
NORMALIZACION o CALIBRACIÓN
- Variabilidad Aleatoria
- Suponemos ciertos modelos de error (e.g. e
i
~N(0,!
2
) ) y recurrimos:
! Al DISEÑO EXPERIMENTAL Para controlarla
! A la INFERENCIA ESTADÍSTICA para extraer conclusiones en su
presencia
- Todos estos procedimientos se integran en un flujo
de trabajo (“ pipeline” ) o ciclo de vida de un
experimento con microarrays
Introducción a los Microarrays 36
Alex Sánchez &
Esteban Vegas
El ciclo de vida de un experimento
Bloc 2
U,0&$"0$20'7-$-#'@$0&0"'A0.03$69'-&2$)0.0$ 42245 $-&$,&0$)6",2&03$$)0.0$ I;= $-&$,&0$8"03$6&$-&#$&.*) 0 $-"#$
&>-""#$./-B51-##%$-&$)0.0$0110<:$
$ ?110<$`$ ?110<$a$ ?110<$b$
c-&$$d3de$ f3eg$ f3hi$
c-&$a$ j3a$ a3i$ j3de$
c-&$b$ i3de$ i3dj$ i 3 dg$
)3=1@E603=6#
!"#$2)160110<#$#6&$,&0$-&0$26"'$56'-&'3$$5-1$'0&'$#/40$.-$20&5,"01$029$),10:$L6.-2$7-&-101$-1161#$26"'$
710$5-1C$0"4610$-&#$5-12-'-&$69'-&1$,&0$+,0&''0'$&2-�$./&86120)*%:$
!B#'-B-&$ 2"6&#$ .-$ 7-$ 5-1C$ -&$ 26"'#$ -B5-1*2-&'#$ &62=#$ -&#$ *&'-1-##-&$ 0+,-""#$ +,-$ #%&$ B0J;2;=:6# -&$
)0.0$26#'10:$k6"-2$'101$7-&#$+,-$#7,&$)010)'-1H#')#$.-$)0.0$)6&.)%:$$
X$ '-&2$ "0$ X_;6:0L# >03@?I014# B;J0=0B4 3$ 56.-2$ -B'1-,1-$ 26"'0$ &86120)%$ ./0+,-#'0$ -&0:$ L6.-1$ 1-5")01$ -"$
&6#'1-$-B5-12-&'$=#$26"'$2561'0&':$
Bloc 2
!"#!$%&'(#)"$"%*
&128-,,1;-<:12 @%A=%%
B)% 4% 5,% '()% &'7)$-% $% 7+1% &0% ("692",' ($' (0%"&3 =% B)% ?4?%. 6 6:2% +,9% 4&,&% +0% +CD+)-%
)"$0D(17(1+'%+0%&E/+,%71(.1&'&%$%+0,%7+1'+%6+1%&09)$,$=%%
B)% ?(?!=@"5$ % 5,% +)% E/+% 6&1+'% ,+1F$1-% +,9%
4&,&% +0% )&% G+4-% ,$'7)+'+0% $0*1(3/2'% )+,%
0(,1+,%3&3+,%$%H&%"#(%6&%(=%!(%7(,%D(01()&1I
#(% (-% D('% +0% +)% A-% 7+1J% 0+D+,,$+'% %$4!'
,-,./-0.+1.+2.-03+
AB%&"$$!#C%!DE!4"@!#F(%*
B,/3$+'%)&% G-,9-<9:9.-. %3+%)"+C7+1$'+0%
;,$,+'9$D&%$%&)+&J1$&<-%1&D*+'%3+%1+3/$1I)&%&)%
'9C$'=%
K+D$3$'%,(41+%)+,% ,HI:9J/12 -%+),% I66:2 :%
KB%=?F!#5"L%&!%(%"@(F+!%*
!>.,-3% '$D1(&11&L,%
$/,.3% $'&.+,%;/0&%7+1%D#$7<%$%6$C+1,%3+%)&%$'&*.+=%
M$CJ%+0,%3(0&19% 9>M6,7-89N%9>O9G9O/-:%O1%8-O-%
26>O- =%
N+'% ->P:929% 3+%3&3+,%3+% <-9Q%>9G1:: @% 20*#30,'($'
RB% 5 =#F4=%&!%ST("F(F%&!%?("D%#"U!%
!>.,-3% $'&.+,%&'4%+)%6(1'&%3+%)&%'9E/$0&%
;=OBP:<%
E,68V23% 6&1+'%3$&.0J,$D,%$%D(01()%3+%E/&)$&-%$%
D('7&1&1+'%)&%'(,1&%&'4%3$6+1+0,%'(3+),%
;>PQ<=%
$/,.3% +,&3$,$E/+,%$%.196$D,%3+%D(01()%3+%E/&)$&*=%
!"#$%&'()*#+%,%
-./#01(%2#31.
!"#$"%&%'()*(%+,-.'.'($"%/0./#'
! "
123.#4#+)04/#"
!#/+/5(6-7.8-5(
%4/0%-.9%)#5(
:+.72;.+)#<'2=%
>#+'#-%5().6?4.-()*(
)#&.+%"
313(/##-'
3%'%)#(+(@74-
:'"A6".+)-B
C.(6%--%5(6%--%D
E.6?4.-()(7/+)"
)>F.G
@7G"*'.H+(I(CJ!K'
:'"A6".+)-B
C#-#(;.+)#<'
!#4%'(#G4.#+*'
3%8*-#&.4'
;8A8%')5(
=0-/.G-' #G4.#+'5(
30+(&%/".%-
;8A8%')
=%+*L#(%-$#("?$.)#
M
!"#$%&%&'()'*"')+,)-%.)"/0'10"'.%1-02--
456'7.#8)")&
492/0&'1-*(0&
4:6';<'()'12$%(2(
4=2>0'"%?)$
4@6'A-),-01)&2(
4B6'C+,$0-21%D"'
()'$2' Matriz
de Expresión
4E6'!"#$%&%&
4F6'G%8"%H%121%D"
I%0$D8%
Bloc 2
!"#$%&$%'(&))+#*
6"#&7$8'%(9:#*
$.0123 !"<=>7"=19.%'(7"?@A:"B';('.'3%,0$63."'3"C@"
$"D@7",-(9;,0$63.E"
)4.- !")'%'00$5")*'#'0%'."F,%0"$"063%(61")'"G9,1$%,%:"
;"#<=89)9#&)+>?)+9(#@)&8&((9:#>&#A&<)#**
&7$%&))+)#>9B&%&<(98C&<+D#**
$. 0123 !"%H%'.%7">IJK>:">L9.%,('+"'1." EFG/H0.3 :"
)4.- !"11$.%,")'"-'3."/'&;('..$5"#61)"0M,3-'7";H
N,19'.E47"-(2#$G9'."$";'(#$1.")*'&;('..$5:"
;"#<=89)9#&)+>?)+9(#@('<)+%I((9:#9#**
J89>(9:#>KI<#$%&>9(+'%D** "
$.0123 !".'1'00$5")'"N,($,F1'.7",L9.%,+'3%",1"+6)'1"
$"N,1$),0$5"
)4.- !"+ 6 )'1.";(')$0%$9.7"+'.9('.")'"#$,F$1$%,%"$"
(';'%$%$N$%,%")'1"+6)'1:"
L"#)9A<9B9((9:#M9'8NA9(** "
$.0123 !"OP>7"OAP>"/Q."93",1-6($%+'"G9'"'3.")$9".$"
'.%,)$.%$0,+'3%".$-3$#$0,%$N'."'3%('")6."#'36%$;.4"
)4.- !"01,.'.")'"-'3.7"$";,%MR,S."'.;'0$,1+'3%"
O"O"#$PEQHP,Q#R.S1PQ3#/#QTQUEHQ3#
_!"#$%&'()&#+,-.)/0/1#_*
?'.9(,('+"'1."'#'0%'.")'"1'.%$+91,0$5"+$%L,3T,3%"U?2_*!
+/-/@/*;;0#0,-0 :"V."93"'.%9)$",3%$07"$";'("%,3%"1," W4/HP-/-#
)'1."+$0(6,((,S."Q.";$%L6(:"
J/.P/1PX#P#-./1-/UQ,-#
<'(" ," %(,0%,(" )'" (')9$(" 1," N,($,F$1$%,%7" M'+" )'" .,F'(" .$" Q."
.$.%'+2%$0," 6" ,1',%X($,7" $" 11,N6(." %$3)('+" )$#'('3%."
%Y03$G9'."G9'",;1$0,(:"
>+F" 1," .$.%'+2%$0," '.%$+,('+" 1," 06(('00$5" ," ;,(%$(" )'" 1'."
),)'.!"0,1$F(,('+"$"36(+,1$%8,('+:"
!"#$%&'()*#+%,%
-./#01(%2#31.
tratamientos al azar
dentro de cada bloque
- 1 Factor: LPS (6 ind)
- 1 bloque: edad (6 ind)
tratamientos sino que
reclutamos pacientes.
procuramos sean
homogéneos.
!"#$%&'()*#+%,%
-./#01(%2#31.
/&0$&($)0 $&1)0 '+-#"&#.&,&#*
2&0$'%3$&4# 5+6,'#$)"&#
/&0$&($#0 7)"%#0 '+4)($"),#",#54)(0&'"#",#
2&0$'%3$&4#
80$&%#" ,#+4)""'44&)( #+1#"$&" '+,)0+#$)
Calibración, Normalización
6,'#$)"&#
Diseño Experimental 9+ controlar 0:+&(7,:'(4&#
Estudios de potencia 9+ cuantificar 0:+'7'4$)
Analisis de significacion 9+ inferencia ;
Bloc 2
-5'
=>%'+.',-&$/.00,9'$-&/$'+:$*.&','?@AB'C' 6
-
'=$//)/B6'A,'&$-,(' 8#&!-91#0'01:-;-#./0 8'2)/$('4$/'."#$%&'()$+6'A,'
D)('(>%'&/)%%)%':$"#.0,9'4$(8'(>%'-$0$%%,&.&':,-4)/(.0,9'&$-,(6'@)"#$%'/$2+,"#$%8'2)0.',-4)/(.0,96'
A$(2/$'"#$./1' !)';1)'(#'!"#88<8 8'"#$'%,'>%'()+&'E/.-8'%,E-,4,0.'"#$' !"!#$"#%$!&'#("#)%$#'&#%#&'+ 6''
7-'+:$F$(2+$' (#':-!0'("#$&8#00-='(#'3>* 8'$& 0 6'7+' !%6'A:G.'$'2+.-&$I./'.3.-%'$'4$/'"#.+%$5)+'$F2$/,($-&6'
2&3"!+#$+,)#%&-#$,&$,"4&((+)#
H,/$('+$%'E/14,"#$%6'A,'&$-,('%,%'/$%#+&.&%'.(3'?@A8'5$#/$(',4$/$-&%'/.++$&$%6'@)$('4$/'*,4$/$-&%'E/14,"#$%8'
.E/#2./'$+%'-)%&/$%'.+J+$+%'*$'5./,$%'(.-$/$%K''
7-'+:$%&#,'$+'!H?8'&$-,('&/$%'E/#2%8','$-'0..%0#-'$+%'.//.L%8'+$%'0)/3$%')-$-',4$/$-&6'!,FM'5)+'*,/'"#$'G,'
G.'G.E#&'2/)3+$($%'$-'+.'%$-L.+6'7+'0+#%&$/'&.(2)0'>%'0+./6'7+' ;<./8%'()+&'*,4N0,+'
.I#%&./'&)&$%'+$%'.$%6' ,%#("&-%$.!/%0.1#"!#2'"'#'+!45.+#6%+!%(!#5"$'(!+#474.(#*%++%&#'$#2.$!&' 6 '
_5&,4&,+#$+,)#%&#-6+!7-(8#$,"9+9-)+)(:#)##$;#_*
7-' $+%' -)%&/$%' $%&#*,%8' $+' -)(3/$' *$' E$-%' >%' ()+&' (>%' .+&' "#$' $+' -)(3/$' *:.//.L%6' O#.-' 0)(2./$(' $-&/$'
(,0/).//.L%8' 4$(' #-.' /' (#' 0/1(#./' #./8#' #!!0 6' @$/M' %,' %9-' ()+&%' E$-%8' %$E#/.($-&' G,' G.#/1' ()+&%' :)!0<0'
7%2$/$(' #-' PQ' *:$//)/8' .,FN' "#$' $-' $+' 0.%' *$' "#$' &,-E#>%%,(' RS6SSS' E$-%8' G,' G.#/,$-' ABBB' 2#.0 ' "#$' $%'
T0)+./,$-U'0)('.'3)-%6' <"!),+()#'=-)*$-& 6 '
A,' +.' /' 0/1(#./ ' >%' ()+&' E/.-8' $+' &' C)!<8 ' >%' 2$&,&8' ,' ."#$%&%' %$/.-' $+%' (>%' %,E-,4,0.&,#%6' V,' G.' *,4$/$-&%' &' *$'
%&#$-&'2$/'.+%'-)%&/$%'$%&#,%8','%:G.-'$'4$/' 4",,&44"!(#%&-#%&!"'*!+%", 6 '
?.',$.'$+' %1!/-&!#'/#0/-.2' $%'/$#$,F'"#.-'.E.4$(' 4(#)%$"&#-8+#'!.!#59'&&"&#.#2'&#-"$!'+#-"+!&'+ 6'A:G.-'*$'
4$/'0)//$00,)-%'2$/'.'0)-&/.%&'(W+&,2+$8'.I#%&.-&'$+%'2X5.+)/%'.(3'$+'-)(3/$'$'()%&/$%'.-.+,&Y.$%'= :'-#5'#
&'+!%&#'$+#2%$+"+#*"+.!.4+ B6'
!"#$%&' (#)$'(#&'*
+#,-.&#* (#)#/%,#*.01 #1)
!"#$%&' (#)$'(#&'*
+#,-.&#* (#)#/%,#*.01 #1)
Bloc 2
!"#$%&'()('+#,(#-(./0'1&(-#*
>'?@6#A1$3<)3)3#$)# '5-+4)#-(./0'1&)#80'&1) #)*0# )%+4(-#-(./0'1&(- 6#
'#)8,&"-)#51<)3)5'B#<1-#"&3#,()#51(-3)#$%)$-3)#C !"#$!%#"&&"' D#1#)0#A)3'&"#C ()&*+!&" D=##
E'#"B(#"''$')3"6#51<)3-&'?&(# !"#$%&'($()%$+' =#>'?F#<1-#"';('2'5) 3 #8,&#<31A&(&(#/%,()#"&89:(5')#51,()=#
G%)$'(&)&(-#/&#"&89:(5'&"#-+#1$-&"#)<$'5)5'1("H#
I ,-$&".)"&(#-%/.-'("&!"(#"&%)"'(
o J/&(-'2'5)3#3&<&-'5'1("#'(-&3(&"#
o K,5)3# /1'('"# 2,(5'1()$"# 51("&3A)-"# &(-3&# &"<:5'&"# C/1'('"H# 3&;'1("# 51*,(&"# 8,&# -&(&(# ,()#
2,(5'B#"'*'$)3D#
I 0%1")(2!"1$!(-.(3+&4$5(16+&.(2!%*"7&.('$(/+'8+")(1%)$&$'(3+&4$%&.-'(4%)+&'(
o >$'(&)*&(-"#$15)$"#&(-3&#/,&"#"&89:(5'&"#
o >$'(&)&(-"#L$-'<$&"#&(-33(7,(-"#/&#"&89:(5'&"#
I 9+'4.!(3!.#)"&*'(16+&.('"8:;&4$.("&(+&.(/.'"(1"(1.1"'(
<=>?@AB(@ 6 ,CDEA,(
M1#)#,",)3'"6#-&('#1$-"#<31;3)&"#)#$)#N&0#8,&#"&3A&'?&(#<&3#)#)$'(&)3#"&89:(5'&"#'#5&35)3I$&"#)#$&"#0)"&"#
/&#/)/&"=#E'#A1$,&#&$#<31;3))#"';,'#L-'$6#(&5&""'-&*#51(:'?&3#$&"#"&A&"#5)3)5-&3@"-'8,&"#'#-&('3#5$)3#&$#8,&#
O&#/&#"&$&55'1()3#5133&5-)&(-#&$#<31;3))#'#")0&3#&$#8,&#A1$�,"5)36#'(-31/,'(-#/'A&3"1"#<)34*&-3&"=#
F$'$5(#-%/.-(1"-'();*%1"'G(
I $%&'()('+# ,(# ,6(-# -(./0'1&(-# 9 G)# <3'&3)# +"# $)# *&A)6# '# $%)$-3)# $)#
"&$&55'1(&*#/&#$)#0)"&#/&#/)/&"D#
o :0+5,(-# 84;<&1-=# &$# >?@AB?@ # +"# ,()# A'"'B# ;342'5)6# '(-,P-',# <&3F#
o Q$"# )%854&+(-# CD+&1-#* /%)$'(&)*&(-# ;$10)$# C EF D# 1# $15)$# C GF D#
"&3A&'?&(# <&3# )# 10-&('3# <,(-,)5'1("# 8,&# &("# 8,)$'2'5)3)(#
&$"# )$'(&)&(-"=# >8,&"-"# )$;13'-&"# 2)(# "&3A'3#
D4584))1&7#,&';&1) 6#<&3F#"B(#*)"")#&?';&(-"#<&3#)#"&3#
<345-'5"#&(#5&38,&"#&?-&("'A&"=
I $%&'()('+#,(#-(./0'1&(-#H%+&D%(-#
I I(41)# ('# J)-(-# ,(# ,),(-# ,-.'"($ /0%$ '"12304+%$ +$ &5%&*#%$ '"#6$
/0%$1/"$'+7/+$%$&%$8%'"$9"$9%9"':$ =#G'#/'&#)$#<31;3))H# 8/'4%5($
1/+0%$'"12304+%$;'$ )H'('$)$-.! $%$&%$1/"$<.$="$!.'%>$4.(!%#%0$
4.0#%$ /0%$ 8%'"$ 9"$ 9%9"'$ 9"$ !#."?0"'>$ 0)4&".*+9'@ D=#
R0-'(/3&# ,(# 4;'K&'8 6# <&3F# "&34# 0)"-)(-# $&(-6# 7)# 8,&# S),3&# /&# 51*<)3)3# *1$-&"# "&89:(5'&"=# >8,&"-#
"&89:(5'&"#C"%)(1&()#2&3#,(# /-.' D=##
Q$"# :-1/&"# KG>ET# 0)")-"# &(# )%854&+(-#
L(64M-+&1-# &("#<1/&(#/1()3#"1$,5'1("#3)<'/&"#<&3F#
(1#(&5&""43')&(-#B<-'&"U#
!"#$%&'()*#+%,%
-./#01(%2#31.
Ejemplos de alineamientos
2 Secuencias no alineadas
L G P S K S Q G T K G S S R I W D N
L N I K T S G A K G A I M R L G D A
Alineamiento global
L G P S K S Q G T K G S! S R I W D
" " " " " " "
L N! I K T S G A K G A I M R L G D
Alineamiento local
!!!!!!! G T K G!!!!!!!
" " "
!!!!!!! G A K G!!!!!!!
Bloc 2
5"=/-/% "&?'-(,@"'&% "(+'-;,&;% ,% 0:/*;% /#/(+0/% B("-,-% "(,;7/% "&?/-"'-C% @'(+,-/(%
5/%/>DE&@"/%5<'&F/%+'"@"'&*8%
&'()+ G% !"#$%&'()()!+),*-./#0%+1)2",%0%"#,)&"&+!, % ,, %
H5/&;";,;%BIJKKC4%"("0,-";,;%BIJKKC%"%7,+%BLJKKC8%%
M"&,0(/&;4% 0< -./0' % <'6;)% ?/&;% :&,% 12+034 % /+/@N?"@,4%
/&%,>:/;%@,%A,%"7:;%0,% !56789&&:; >:/%/&%5'&,%0/*%
+:&;:,@"'&%5/%(,;@A%"%("(,;@A8%
O&% 5/0% /#;-/(% &'% @'&;,-.4% A,6";:,0(/&;% /0% 5/%
0:/--,4% )*% /0% >:/% ?,% /0% @'(/&P,(/&;% 5/% 0,%
@-/(,00/-,4% Q,% >:/% "% 0/% />DE&@"/% *R&% 5/%
5"?/-/&;%0'&7";:54%+/-%,07:&%00'@%A/(%5/%;,00,-8%
S,% !56789&&; )%:&,%(,;-":%5/% ABA; B &#%3)-'+&4*)
!!&4, C4% /&% 0,% >:/% :&,% @'"&@"5E&@",% @'&;,% C; "% :&,%
5"*@'-5,&P,% DA 8% S,% ?/(% */-="-% +/-% ,0% TU!4% "% /&%
,>:/;%@,%)%"(E;-"@,8%
9';%*/-% 4(3+E032; B@'"&@"5/"#J&'4%+:&;:,;%,(6%V%"%KC%
3&% /0% @,% 5/0% ,("&'.@"54% 0/% (,;-":*% *R&% ('0;%
()% 7-,&% BWV#WVC4% "% &'% *'0/&% /-% :&";.-"/8% S,%
**F=$:/%;/&"(%
-4J-+3+4.3I; B 2"&) ,4) -'1) (2##&) () !5+3%#"60%()
$4+#)0"3)'#)+!&4* C8%%
S,% -313K3+4L 4%/&%,>:/;%/#/(+0/4%R&%0/%+,-/00/%5/%-/"5::%>:/%;"&7:"&%:&,%+:&;:,@"R%()*%7-,&%>:/%F/-'8%%
ices de substitución
de la matriz de substitución:
la coincidencias (match)
ción de las discordancias (mismatch)
match= -4 para comparar secuencies de nucleótidos. Esta puntuación es
cativo EMBOSS.
ètrica, i.e., puntuación G vs C = puntuación C vs G (o cualquier otra
ótidos).
itaria?
!
!
!
!
!
"
$
$
$
$
$
%
&
' ' '
' ' '
' ' '
' ' '
4 4 4 5
4 4 5 4
4 5 4 4
5 4 4 4
T
G
C
A
A C G T
!
!
!
!
!
"
$
$
$
$
$
%
&
0 0 0 1
0 0 1 0
0 1 0 0
1 0 0 0
T
G
C
A
A C G T
na matriz de substitución para aminoácidos?
!"#$%&'()*#+%,%
-./#01(%2#31.
Resultado d’ !"#$%%&'()*&+),-.)/
http://www.ebi.ac.uk/Tools/emboss/align/
! Qué quiere decir:
Length,Identity, Similarity,
Gaps i Score i como se
calcula?
!"#$%&'()*#+%,%
-./#01(%2#31.
Efecto del valor de la penalización
Muchas inserciones pequeñas
Bueno si se trata de proteínas
distantes
Pequeño Grande
Algunas inserciones grandes
Bueno si puede que se hayan
insertado dominios completos
Grande Pequeño
Pocas inserciones o eliminaciones
Bueno para proteínas muy
relacionadas
Grande Grande
!"#$%&'()"
!"&$+,$+*
$-&$%).%+*
,$/+ gap
!"&$+,$+*
'0$(&1('+
,$+ gap
A0/#&:$&:/&0#/&)/44"/&B0#&<0$-#"C&"#$%&0#&:-)/&'"&(")&/4$#"/1$-#+7&$#"#*"%&)".)-'0$)&
4>"F-401$D7&"#&4/&90/4&+>"4$%$#"#&-&+>$#+")*"#&)"+$'00+2&
G#&0#&"5"%.4"&'>GHIJKK&'>/4$#"/%"#*&<4-@/47&F"$"%&
90"&4/&."#/4$L/1$D&"#+&1-#/&1-%&/&MN7&$&90"&4/&"5*"#+$D&
'"&.&"#+&1-#*/&1-%&/&N2O2&
G4+&</.+&($#/4+7&"#&"4&164104&'"4& &'()$ &#-&"+&P&"#&1-%."7&
.")?&"#&"4&.")1"#/<"&+$&B1-#*/)"%&+-@)"&-#L"C2&
Bloc 2
45 .% !"% #+#3&% +'% (!"#(#3#'% (% -"3% '()(&(#-* @:# A$# ($./"0# B<# -'# (78(7 ,# "#
4$0/'(#-'#("/$/#(B<#5C('/1<#9 363"'%"+)7'%2(% 2 30(3%)3#82.%)(')3#82.%
>)# <'%D'5.# 'E'(6)'# B<# ,:-,;62-0263)
A$#"-'5.".$.#B<#-')# >+?@) "#)? .A4B2) <'/F#-'# *C'' :# ;'#!)%8+),3$3"#%'!/<="8(!'%
>38#("3%?%(%@.%3)6%)+&#!'%(!"#(#3#'%!"%8+)AB%
&DEFG%$)H9)$%!$EGE%IGJ)H9)KF#E9LM9$)
A'<# 0-3B;/. # <85# -'# ,4-"% /-+8(!"#% *!% C$!/<="8(!' @:#
D3% C$!/<="8(3% *!&% !',"+$-)+( 7'% 3/-!&&3%
/-!% 2!% #$+63#% E+% )./"+0$-$ B% D3% *!&%
98205,2 G# '<."('(# 30'# )$# /"+&!$( '<#
<02<.".0'"E"# 05# .1.$)# -'# FBFFFF5GH%
H1<$)./'<# 12<'/='(# 05# I:IIIJKL# 9-'0#
='%$-'<#(B<@:##
!"#&'(#')# ,45# 30$)<'=1)# $)"5'$('5.,# )? .A4B2 # &"5$)# -'6'5-/F# -')<# 52.4. # '<6'7"&"7$.<# '5# )$# 0-3B;/) 12) ./=.3;3/A;Q) "# ' 5 # )'<#
526-,;3P-A;46. #30'#."5%0"5#)?12'/.0/$#"#)$#'E.'5<"8#-'#%$6<:#
>E"<.'"E'5#-"&'/'5.<#<"<.'('<#-'#605.0$7"8,#71(#6'/#'E'(6)'#)$# 0 - 3B;/)/6;3-B;- ,#1#.$(2B#)'<# 0-3B;/.)KD&6 ,#
0/3-A;46.) 5/63/-,.)
-AA253-12. #9 *(C!$="8(!' @: )
O"5$)('5.,# 6'/# $# )'<# 6/1.'P5'<#
1;.3O6A;2.) 2R4,/3;R2.) 6'.".'<,#
20<7$/'(# 05$# !"#$%&)
Bloc 2
!"#$%&'()+,-#./0%)1,0+,1#23)+#*
A&$ 4,(40#,5607-)80 $+19"+&$1-&$2/&-$<1&-,,&,$
/(&.,B&/0$$9(/$,&-,$'(#$3($,/"6&/$&.,(/-&,*)(+$
#C+$ /&9*3(+0$ 7"#$ &/&$ .&$ 3('&(0+04)9#
D(+".(#$ &.-(&#(-,+$ #C+$ 9(,,+0$ *$ &-(#$
E(/$ &$ "6,(-/$ .:&.-(&#(-,$ ;9,#0$ 9/#(/$
7"-+,/1F/(#$ 1-&$ #&,/1$ 3($ /(7"-+,/17780$ <1($ (-+$ 9(/#(,/G$ (+6/-&/$ <1-$ C+$ (.$ #.."/$ 7&#F$ 9(/$ &$ &//6&/$ &.$
E(/$(5(#9.(0$9(/$&$&-&/$&$E&/F+0$9"3(#$&2&4&/$
1-&$)&$3/(7,&$$&-&/H'$3(+$3($I&/7(."-&0$"$6C$
&-&/$ &$ A"-3/(+0$ 3(+)*&/H+($ $ &7&6&/$ &//6&-,$ &$
E&/F+@$J#6$.(+$+(<=>-7(+$21&.0$,",$*$<1($)".(#$
+2-47&78$6".;27&$,C@$
J5F0$&$<1&.+()".$9"+78$3($.&$#&,/1$'*$9"3(#$
&//6&/$3($,/(+$#&-(/(+$34(/(-,+K$(-$ .)0&'10% 0$
(.$ <1($ +19"+&$ 7"-73>-7&$ 3(.$ 7&/G7,(/$ 4.&$ *$
7".1#-&@$L-$ 6'();'10% 0$(.$<1($+19"+&$*-+(/,&/$
2&9+$(-$.&$+(<=>-7&$)(/,7&.0$"$6C$(-$ 8,(*)40% 0$
'"/*,B"-,&.@$
E(/$&$"6,(-/$(.$-"+,/($&.-(&#(-,$;9,#$#/&/(#$<1-$7&#F$"6,C$.&$#.."/$91-,1&780$$(-+$<1(3&/(#$$
&<1(+,0$,",$$<1($-:'$9",$'&)(/$#C+$3:1-@$
<5,+3%,#=>#$%&'()+,#.,#?,,.%,+01@A71-46#*
E(/$&$&9.7&/$&<1(+,$&.2"/,#(0$'$'&$,/(+$9&++"+$#9"/,&-,+K$$
=" !"-+,/1M/$.&$#&,/*1$
B" E1-,1&/H.&$
C" N3(-,47&/$.:&.-(&#(-,$;9,#$
E(/$&$ DE?FGHIJH#K$#L$GHJI 0$&2&4(#$31(+$+(<=>-7(+$$.(+$9"+(#$(-$1-&$2/&(..&0$(-$)(/,7&.$$(-$'"/*,B"-,&.@$
E"+&/(#$1-$ M=# &$.(+$ ).,1)0-# O !"#$%&'%())+,"-),%.+/0"!,)-* P0$1-$ N# &.+$ +)-+046- 0$1-$ @C $9(/$&$.&$ 'O,(7(0#
5678675899 !"#$%&'()*#+%,%-./#01(%2#31. 4
Cómo utilizar programación dinámica para
obtener el alineamiento óptimo?
Se obtiene un alineamiento óptimo para una subsecuencia,
P.ej. el primer carácter de cada secuencia por la izquierda.
El alineamiento óptimo de la subsecuencia inicial se
mantendrá en el alineamiento óptimo final
cualquier otro puntuaría menos que éste! disminuiría la
puntuación total
Tras alinear la primera subsecuencia ya no hace falta
trabajar con ella! Se pasa a la subsecuencia
siguiente y así se va iterando hasta el final
el coste de cada paso es bajo
el resultado final se obtiene de acumular los resultados de cada
paso
5678675899 !"#$%&'()*#+%,%-./#01(%2#31. 4
T C G C A
T
C
C
A
s
22
-"%.:3(:;:)1<= <#%)1<1 >?(/= #(%"1%
@1/A:+ #. #"%.:3?:#(/#B%C=<=. "=.%
)1A1)/#A#. 1./1 #"%>?(/= .#%1(
1":(#1<=D%
E?#<# .#AD%.:(%#@01A3=%F?#
1G1 @?)=. )1@:(=. F?# ""#H1(
1"%>?(/=
La posición etiquetada “s
22
” representa TC alineado con TC
--TC -TC TC
TC-- T-C TC
Matriz de puntuaciones y alineamientos
Bloc 2
,82A1'()'('+,'B0/1)C'D0'-),0*-?E'#F'@#G@#$'
@(<,/-!A#'!#!-#$6#+.!1(-2!,+!3,+.#'!*#-!4&A#/#+.-!3,')&+(3&,+-!5%#!1,4#'!
A#/8! B+#'! 3,'1(/(+.2! 1/&'#/! %+! 3,+./(! %+2! 4#-1/C-! %+! 3,+./(! 4,-2! 4#-1/C-!
4,-!3,+./(!4,-D!E-!'#3=+&32!(+#'!A#+.!1,3!(!1,38!
B!3(4(!3(-#(!F&!1,-(/#'!#!'=0&'!-3,/#!1,--&)#!5%#!1,4#'!,).#+&/!1#/!*#-!
4&A#/#+.-!<&#-!4G(33C-8!!
B!1,H#/!1,&+.!F&!F(!%+!#//,/2!-&!(&+##'!%+(! 9 !(')!%+(! : !F&!F(!%+!'&-'(.3F2!&!
1#/!.(+.!#!<(,/!-#/&(! ;#0. 8!I,'!5%#!(5%#-.!C-!G-3,/#!'C-!(.!: !")#%"&*!)#+,)#!-)#
+,-%.!-#/01#2!#,3.#4-#5%2#.#!6&!-+*!7" ;2!#+-!5%#4(/#'!(')!J8!
@#-!4&/#33&,+-!5%#!4,+&+!,3!(!(!'=0&'(!1%+.%(3&?!-#/(+!#-!5%#!A(/#'!-#/<&/!#+!
B!A&+(2!'&/(/#'!(! (!22.0,41$%1&!8,"#,2&,<2%!(&,&)#22& 2!&!#+!(5%#-.(!4&($,+(!
-#/&(!#! (!22.0,&2!$#&(#$% 8 !K&/(/#'!#-!./#-!3(-##-!4#!-#%!<,.(+.!&!(+&/#'!A#+.!
%+(!L+&(!(!(!5%#!.&+$%&!
G ).0# !'C-!(*.8!!
M%(+!.#+&'!(!'&,/!1%+.%(3&?2!1(--#'!(!(!-#$6#+.!
3,%'+(!&!($(A#'!(!'&**,/!1%+.%(3&?8!
( b )
( g )
( c )
F M D T P L N E
F K H M E D P L E
Sequence 1
Sequence 2
( d )
0 –2 –4 –6 –8 –10 –12 –14–
Score = Max F( i –1, j –1) + s( x
i
, y
i
)
F( i –1, j ) – gap penalty
F( i , j –1) – gap penalty
{
Score (this example) = +1 (match)
–2 (mismatch)
–2 (gap penalty)
F M
F
K
Sequence 1
Sequence 2
0 –2 –
( e )
F M
F
K
Sequence 1
Sequence 2
0 –2 –
Sequence 1
Sequence 2
F( i –1, j –1)
F( i –1, j )
F( i , j –1)
F( i , j )
penalty
penalty
+1 –1 –3 –5 –7 –9 –
( f )
F M D T P L N E
F K H M E D P L E
Sequence 1
Sequence 2
0 –2 –4 –6 –8 –10 –12 –14–
+1 –1 –3 –5 –9 –
( a )
F M D T P L N E
F K H M E D P L E
Sequence 1
Sequence 2
0 –2 –4 –6 –8 –10 –12 –14–
–1 –3 –5 –9 –11 –
–3 –3 –3 –5 –7–9 –11 –
–5 –2 –4 –5 –7–9 –11 –
–7 –4 –4 –6 –7–9 –11 –
–9 –6 –3 –5 –7–9 –11 –
–15 –9 –9 –8–5 –5 –
–13 –10 –7 –7 –6–3 –5 –
–11 –8 –5 –5 –4–6 –8 –
, y j
)
FIGURE 3.21. Pairwise alignment of two amino acid sequences using the dynamic program-
ming algorithm of Needleman and Wunsch (1970) for global alignment. (a) For sequences
of length m and n we form a matrix of dimensions mþ1 by n þ1 and add gap penalties in
the first row and column. Each gap position receives a score of 2 2. The cells having identity
are shaded gray. (b) The scoring system in this example is þ1 for a match, 2 2 for a mismatch,
and 2 2 for a gap penalty. In each cell, the score is assigned using the recursive algorithm that
identifies the highest score from three calculations. (c) In each cell F(i, j) we calculate the scores
derived from following a path from the upper left cell (we add the score of that cell þ F(i, j)), the
score of the cell to the left (including a gap penalty), and the cell directly above (again including a
gap penalty). (d) To calculate the score in the cell of the second row and column, we take the
maximum of the three scores þ1, 2 4, 2 4. This best score (þ1) follows the path of the red
arrow, and we maintain the information of the best path resulting in each cell’s score in
order to later reconstruct the pair wise alignment. (e) To calculate the score in the second
row, third column we again take the maximum of the three scores 2 4, 2 1, 2 4. The best
score follows from the left cell (red arrow). (f) We proceed to fill in scores across the first row
of the matrix. (g) The completed matrix includes the overall score of the optimal alignment
( 2 4; see cell at bottom right, corresponding to the carboxy termini of each protein). Red
arrows indicate the path(s) by which each cell’s highest score was obtained.
AIRWISE SEQUENCE ALIGNMENT
6478976844 !"#$%&'()*#+%,%-./#01(%2#31. 45
Ejemplo
por las filas y
columnas
laterales
base ara
extender el
alineamiento
! -
" -
! -
# -
0 -4 -5 -6 -
! "! $
!"#$%&'()*#+%,%-./#01(%2#31. 45 6478976844
Ejemplo. Puntuación de (1,1)
-Emparejar: -
Es -
- Ponemos
- El valor obtenido en la tabla
de puntuación
- Y una diagonal en la matriz
de reconstrucción indicando
que se emparejan los dos
elementos
! -
" -
! -
# -4 -
0 -4 -5 -6 -
! "! $
! -
" -
! -
# -4 \
0 -4 -5 -6 -
! "! $
!"#$%&'()*#+%,%-./#01(%2#31. 45
Cálculo de la matriz de puntuaciones (4)
Fórmulas de cálculo
- Utilizamos la notación siguiente:
- S(i,j): Puntuación para coincidencia o no
- W
k
= a+b·k : Penalización afín para un “gap” de longitud k
- La puntuación de la fila y la columna 0 se obtiene de:
k,
P(k,0)=-W
k,
- Y la puntuación de cada celda de la tabla de:
$
%
&
' '
' '
' ' (
' )
max (, ) ,celdasanterioresdela columna
max ( ,) ,celdasanterioresdela fila
( 1 , 1 ) (,),celdaanterioren diagonal
(,) max
1
1
y
y
x
x
Pij y W
Pi x j W
Pi j Si j
Pi j
Bloc 2
!"#$%&'()('+#,%+&-%(#**
;0%'()#1&)%)A/+0/&,%,&234$'0&,%$(1)#/)&$+%7,%&12"4%6(/&)%+1(8#1%#/<"$#%'(,#%&$% ./,# B !"!#$%!&'()(!*'
+,-!(!'./'#0)1%)20/!3'4!#)1%)20/!*5 C=%D0EF5%8",2"&)%
*1(8#8/&)&$+% &,+#1G% 1&/#'0($#+% #)8% /#% 90/(<4$0#% 0%
H#%&0$#%-.DI;%&+%)(,+1#1G%&/,%+1(,,(,%'()"$,5%)0+J#$K#$+%
&/,%#/0$&#)&$+,%)A/+0/&,%0%.L0%L#%-&%1(<1#)#'0?%-0$G)0'#%2"&%L(%9#$5%*&1@%,?$%)(/+%'(,+(,(,=%%
S#) 87 % (-&)% 9&1% #/<(10+)&,% ;(05<4+&.4 5% 2"&% $(% +.#,,&<"1&$% (8+&$01% /.#/0$&#)&$+% )A/+0/&% @*+0)5% *&1@% ,?$%
!"#$%&'()*#+%,%-./#01(%2#31. 4 4567864799
Esquema del tema
- Introducción a los alineamientos múltiples
(AMS)
- Métodos heurísticos para el AMS: CLUSTALW
- Representación de AMS: de la secuencia
consenso a las expresiones regulares
!"#$%&'()*#+%,%-./#01(%2#31. 4 56789758::
Aplicaciones
- Representación de familias de proteínas y
construcción de modelos para la identificación
de miembros potenciales de la família
- Identificación y representación de patrones
conservados en las secuencias relacionados
con la estructura y la función
- Deducción de la historia evolutiva
!"#$%&'()*#+%,%-./#01(%2#31. 4 5467865799
Definición de AMS
- Un alineamiento múltiple de secuencias se
obtiene insertando en cada secuencia un cierto
(quizás 0) número de huecos (“gaps”) de forma
que
- las secuencias resultantes tengan la misma longitud y
- cada columna tenga como mínimo un carácter
diferente de ‘-’ (“gaps”)
IMAGINABLE
IMPRACTICABLE
INFALIBLE
I M—- A G- I NA BLE
I MPR A CT I CA BLE
I N-F A L- I -- BLE
I M-—- A G- I NA BLE
I M-PR A CT I CA BLE
I N—F A L- I -- BLE
56789758:: !"#$%&'()*#+%,%-./#01(%2#31. 4
Puntuación de los AMS
- Un alineamiento múltiple implica un alineamiento de
parejas para cada par de secuencias
- La puntuación SP (“Suma de Parejas”) de una
alineamiento múltiple es la suma de las puntuaciones
de todos los alineamientos a pares implicados
A A C G T A C G A T A
A – C G T A – A A T G
G T C G T A - - T T A
match = 1
mismatch = 0
gap-character = -
gap-gap = 0
5
3
4 SP score = 12
1 –2 3 3 3 3 –2 –2 1 3 1 = 12
6785986544 !"#$%&'()*#+%,%-./#01(%2#31. 45
Problemas en el estudio de AMS
- El estudio de los AMS contempla distintos
aspectos
- ALGORITMOS
- ¿Que algoritmos existen para obtener AMS?
- ¿Cuál es el más adecuado?
- ¿Existen algoritmos óptimos al estilo de NW o SW?
- REPRESENTACIÓN
- ¿Como podemos representar de forma concisa un AMS, o
mejor aún las características que éste revela sobre la familia
de secuencias?
Bloc 2
2
9($ :.$ )(7%-+#$ ()*(+ ;$ <.).#.$ #+%.+?@(.;$ '%2$ -,$ ),-.%.-$ 0$ !.+(A5$ B-.$ :.=(+/.0;$ 0.$ !"#$
C%$ .$ =/#<.%$ /-.$ +(,-./)01 /2+(+ ;$ +%.<+.%!$ )$ +%,=.%$ 0.$ 30*(
D.$().$)0$ !"#$%&"' $"#$0.$#@E-+F$$
G5 [email protected]!$+,+#$0#$'.%00#$)$#IE?-<(#;$($!(%!$0.$I/$+"$!(00,%$
J5 K1.I/#+.$'%(!%.$.0(-.<(8$7!$/-.$ +(,-./)01/2+(*+ 5 $
L5 H!=$ 0.$ #IE?-<(.$ <,-#-#;$ +,%-!$ .$ 7%$ 01.0(-.!-+;$ .-*!$
7-+$0$<.!M5$H(AM;$0$%#/0+.+$#+.%&$!,0+$)+%!(-.+$'%$ )",%7.",%!#08"#",%,"5;<&30",%5."%+-+="8 5 $
HI/#+$!<.-(#!$<%.%&$/-$ 04;4(1<9)0 ;$I/$#%&$/-.$%7%?-<(.$'%$.$<%.%$01.0(-.!-+$!N0+('0*5$$
D0.>,%#$',)%!$7%$/-. 13074)915(15)+7=/)(+ ;$I/$-#$)(%&$<,!$)$#'.%.)#$#+.-$-+%$00#$0#$#IE?-<(#5$
O,!-<!$($>(!$I/$0.$ $>1 ($0.$ $?1 #,-$0#$I/$+-*-$
)#'%"#$+(-)%M!$ $@1 ($ $A P$$
[email protected]!$ 0.$ >1 )1 :01? ;$ ($ 0#$ 0:)((3 5$ Q($ :!$ 7*+$ @.'#;$
.I/*#+#$ R.$ -,$ *#$ ',)%.-$ +,<.%$ 3 !"#% +0:$% )",% !#08"#",%
K#'%"#$.0(-!$0.$G$($0.$L;$($7!$0$!.+(A$I/$.=.-#;$#($+-(!$@.'#$-,$*0#$
+,<.%!5$ D0.>,%#$ 0#$ ',#.%!$ +,+#$ R/-+#;$ !.-+-(-+$ 0#$ @.'#;$ ($ 7.%!$
H$0.$'&@(-.$S=$)$016TUD$:($+%,=!$/-$+/+,%(.0$#,=%$0$7/-<(,-.!-+$)*0$
<0/#+.0S5$ !""#$%%&&&'()'+,'-.%/,+0%"-"12+34%#21"(*0%,3-4"+3&'!"53 6
60#$ %#/0+.+#$ )$ 01.0(-.!-+;$ .0$ <0/#+.0S;$ #/%+-$ %'%#-+.+#$ )$ !.-%.$
#'<(.0F$$
V Q($:($:.$ 5)B(4<./)(+ ;$I/.-$',#!$',I/#$#IE?-<(#;$0$'.+%8$*#+.%&$!"#$!.%<.+5$3W4$$
V H$#,+.$)$+,+$',#.%&$0.$#IE?-<(.$ /2+(+ 5$$
V X/.-$#/%+-$ C97(7+ $,$.#+%(#<#;$>,0)%&$)(%$I/$ ('(",%)",%))"(#",%5."%(/%!"#%7+8.&(% 4706*8-+34 $3 ,"#6%")%
O.).$ (-.$ +"$ 0.$ #>.$ !.-%.$ )1A'%##.%$ 0.$ #>.$ #IE?-<(.$ <,-#-#$ 3Y$ 0.$ !.+(A.$ ::(740 ;$ D $ 0.$ !.+(A.$
89/)20:)707 ;$ EF1 ",!"32=03%7")%ABCDE?BF4%"&,%1'%70#6%")%(.('#0+)4%5."%(+8@/%1'%!'(%!',+#%!"#% ,13124 45 $
60$ /:9+70:G $"#$/-.$(-.$'%$.$7%$.0(-.!-+$!N0+('05$H$!"#$)1.I/#+.;$+.!="$-$+-(!$)1.0+%#5$60$ HI&J $-,$
"#$/-.$(-.;$#(-8$/-.$=.#$)$).)#$I/$<,-+"$#IE?-<(#$<,-#-#$)$)(7%-+#$7.!M0(#;$'*%$.$<,-+%.#+.%$0.$
-,#+%.$#IE?-<(.$.!=$)1.0+%#5$$
9 :;9:<:=>?@AB6C:D<6?E< 6
H$ 01:,%.$ )$ %'%#-+.%$ 0#$ %#/0+.+#$ )1/-$ .0(-.!-+$
!N0+('0;$+-(!$)(>%##$,'<(,-#;$.0@/-#$!"#$A.<+*#$
I/$ 0#$ .0+%#;$ '%2$ +,+#$ +%.<+-$ )$ %'%#-+.%$ *0#$
CLUSTAL W (Cont.)
S
1
S
3
S
2
S
4
Guide Tree
S
2
S
4
S
1
S
3
gaps to optimize alignment
Align most
similar pair
Align next most
similar pair
S
2
S
4 Align alignments,
preserve gaps S
1
S
3
new gap to optimize alignment
of (S
1
S
3
)with (S
2
S
4
)
CLUSTAL W (Cont.)
S
1
S
3
S
2
S
4
Guide Tree
S
2
S
4
S
1
S
3
gaps to optimize alignment
Align most
similar pair
Align next most
similar pair
S
2
S
4 Align alignments,
preserve gaps
S
1
S
3
new gap to optimize alignment
of (S
1
S
3
)with (S
2
S
4
)
6789586944 !"#$%&'()*#+%,%-./#01(%2#31. 45
CLUSTALW
- En este método se alinean separadamente
todos los pares de secuencias para calcular una
matriz de distancias que indique la divergencia
entre cada par de secuencias
- A partir de la matriz de distancias se calcula un
“arbol guía”
- Las secuencias se alinean progresivamente
siguiendo el orden de las ramas del arbol guía
!"#$%&'()*#+%,%-./#01(%2#31. 45
Una jerarquía de modelos para AMS
- Hay muchos métodos
- Secuencia exacta
- Secuencias consenso
- Expresiones regulares o patrones
- Perfiles o Matrices de pesos posicionales
- Modelos ocultos de Markov
- En este curso solo consideramos los tres
primeros
- Más información en este enlace