Instalação da spserv01 e migração da osg-ce
Description
Copiados os
/etc/passwd,/etc/shadow,/etc/gshadow
e
/etc/group
yum -y install ypserv
ypdomainname grid
/etc/init.d/ypserv start
cd /var/yp
make
chkconfig ypserv on
vim /etc/fstab
osgce:/home /home nfs rw,hard,bg,rsize=32768,wsize=32768,udp,nfsvers=3
vi /etc/ntp.conf
# Permite acesso ao servidor para sincronizar mas nao permite modificacoes no servico
restrict default nomodify notrap noquery
# Permite acesso complete para a interface local
restrict 127.0.0.1
# Libera acesso a minha rede local
restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap
server pcdsh05.on.br
server ntp.cais.rnp.br
server ntp.puc-rio.br
server ntp1.pucpr.br
server ntp.pop-rs.rnp.br
driftfile /var/lib/ntp/drift
#Fim de arquivo
service ntpd start
chkconfig ntpd on
yum install dhcp
vim /etc/dhcp.conf
yum install sendmail sendmail-cf
chkconfig sendmail on
vim /etc/mail/sendmail.mc
DAEMON_OPTIONS(`Port=smtp,Addr=127.0.0.1, Name=MTA')dnl
DAEMON_OPTIONS(`Port=smtp,Addr=192.168.1.155, Name=MTA')dnl
DAEMON_OPTIONS(`Port=smtp,Addr=200.136.80.2, Name=MTA')dnl
dnl DAEMON_OPTIONS(`Port=smtp,Addr=127.0.0.1, Name=MTA')
dnl FEATURE(`accept_unresolvable_domains')dnl
dnl define(`SMART_HOST',`mail.my-site.com')
vim /etc/mail/access
Connect:192.168.1 RELAY
vim /etc/mail/virtusertable
sprace.org.br root
@grid root
vim /etc/mail/local-host-names
sprace.org.br
grid
vim /etc/aliases
root@spsrv01.grid
cd /etc/mail
make
newaliases
/etc/init.d/sendmail restart
Nos nodes instale o
sendmail-cf
e replicar a configuracao abaixo:
vim /etc/mail/sendmail.mc
define(`SMART_HOST', `[spserv01.sprace.org.br]')dnl
cd /etc/mail
make
newaliases
/etc/init.d/sendmail restart
OBS foi necessário remover o /var/mail/sendmail* de alguns nodes
Para a instalacao dos nodes usando PXE é necessario instalar
yum install tftp-server -y
vim /etc/xinetd.d/tftp
disable=no
yum install syslinux
cp /usr/lib/syslinux/pxelinux.0 /tftpboot/.
mkdir /tftpboot/pxelinux.cfg
chmod a+r /tftpboot/pxelinux.cfg
cd /tftpboot
vim pxelinux.cfg/default
# timeout de espera
TIMEOUT 10
# Sempre mostrar o prompt
PROMPT 1
# Default e' realizar a instalacão
DEFAULT linux
LABEL linux
kernel vmlinuz1
append ks=nfs:192.168.1.150:/tftpboot/kickstart/ks_32bits.cfg initrd=initrd1.img devfs=nomount ksdevice=eth1
LABEL boot
LOCALBOOT 0
Dentro desse diretório
/tftpboot
devem ficar o kernel e a imagem initrd que voce deseja bootar (No caso do Scientific Linux, procure no repositório por
images/SL/pxeboot/vmlinuz
e
images/SL/pxeboot/initrd.img
ls /tftpboot/
initrd.img msgs pxelinux.0 pxelinux.cfg vmlinuz
mkdir /tftpboot/kickstart
cp ks_* /tftpboot/kickstart/.
A demora para passar pelo menu do TFTP até o
default
foi corrigida adicionando a seguinte linha no
/etc/dhcpd.conf
next-server 192.168.1.155;
cd /opt
wget http://atlas.bu.edu/~youssef/pacman/sample_cache/tarballs/pacman-3.28.tar.gz
tar --no-same-owner -xzvf pacman-3.28.tar.gz
rm -f pacman-3.28.tar.gz
cd pacman-3.28/
source setup.sh
cd /opt/osg-serv
pacman -get http://software.grid.iu.edu/osg-1.2:gums
mkdir /etc/grid-security
mkdir /opt/osg-serv/globus/TRUSTED_CA
vim /etc/fstab
osgce:/opt/osg-1.0.0/globus/TRUSTED_CA /opt/osg-serv/globus/TRUSTED_CA nfs rw,hard,bg,rsize=32768,wsize=32768,udp,nfsvers=3
ln -s /opt/osg-serv/globus/TRUSTED_CA /etc/grid-security/certificates
Na osg-ce
mkdir certificado
cd certificado/
. /OSG/setup.sh
cert-gridadmin -host osg-ce.sprace.org.br -prefix testserver ca doegrids -affiliation osg -vo dosar -show -email mdias@ift.unesp.br
scp spserv01cert.pem 192.168.1.155:/etc/grid-security/hostcert.pem
scp spserv01key.pem 192.168.1.155:/etc/grid-security/hostkey.pem
Novamente no servidor
mkdir /etc/grid-security/http
cd /etc/grid-security/
cp hostkey.pem http/httpkey.pem
cp hostcert.pem http/httpcert.pem
chown -R daemon:daemon http
cd /opt/osg-serv/
source setup.sh
vdt-control --enable vdt-rotate-logs
vdt-control --enable mysql5
vdt-control --enable apache
vdt-control --enable tomcat-55
cd $VDT_LOCATION/tomcat/v55/webapps/gums/WEB-INF/scripts
./gums-add-mysql-admin "/DC=org/DC=doegrids/OU=People/CN=Marco Dias 280904"
cd /opt/osg-serv/
pacman -get http://vdt.cs.wisc.edu/vdt_200_cache:OSG-RSV
Novamente na osg-ce
cert-gridadmin -service rsv -host spserv01.sprace.org.br -prefix rsv-spserv-grid ca doegrids -affiliation osg -vo dosar -show -email mdias@ift.unesp.br
scp rsv-spserv01-gridcert.pem 192.168.1.155:/etc/grid-security/rsvcert.pem
scp rsv-spserv01-gridkey.pem 192.168.1.155:/etc/grid-security/rsvkey.pem
Para instalar o RSV como servico seguimos as instrucoes de configuracao em
https://twiki.grid.iu.edu/twiki/bin/view/MonitoringInformation/MapServiceCertToRsvuser capturando o DN com
openssl x509 -text -subject -in /etc/grid-security/rsvcert.pem
Retornando à spserv01
cd ${VDT_LOCATION}/osg-rsv/bin/
source ${VDT_LOCATION}/setup.sh
$VDT_LOCATION/osg-rsv/setup/configure_osg_rsv --user rsvuser --init --server y -ce-probes --ce-uri "osg-ce.sprace.org.br" --srm-probes --srm-uri "osg-se.sprace.org.br" --srm-dir /pnfs/sprace.org.br/data/mdias --gratia --grid-type "OSG" --consumers --verbose --setup-for-apache --use-rsv-cert --gridftp-probes --rsv-cert-file /etc/grid-security/rsvcert.pem --rsv-key-file /etc/grid-security/rsvkey.pem --gratia-probes "condor managedfork metric gridftp-transfer" --setup-from-svn
vdt-control --enable condor-cron osg-rsv apache
vdt-control --on condor-cron osg-rsv apache
Necessário registrar essa maquina no OIM (Site do OSG para registro de resources) e esperar que este seja ativado. Surgiram problemas que a mesma não estava enviando o report para o site MyOSG, consertado com
$VDT_LOCATION/vdt/setup/configure_gratia --probe metric --report-to rsv.grid.iu.edu:8880
mkdir /opt/frontier-squid
chown -R dbfrontier: /opt/frontier-squid
su -dbfrontier
cd /tmp
wget http://frontier.cern.ch/dist/frontier_squid-4.0rc9.tar.gz
tar -xvzf frontier_squid-4.0rc9.tar.gz
cd frontier_squid-4.0rc9
./configure
/opt/frontier-squid
200.136.80.0/255.255.255.0 192.168.1.0/255.255.255.0
make
make install
crontab -e
7 7 * * * /opt/frontier-squid/frontier-cache/utils/cron/daily.sh 2>&1 >/dev/null
su -
cp /opt/frontier-squid/frontier-cache/utils/init.d/frontier-squid.sh /etc/init.d/.
/sbin/chkconfig --add frontier-squid.sh
Para o CMS
vim /home/OSG_app/app/cmssoft/cms/SITECONF/local/JobConfig/site-local-config.xml
upload das informações para o CVS do CERN usando /twiki/bin/view/Main/EntryDescriptionNo64. Necessário também enviar um Savannah ticket (frontier) como "bug" informando sobre esta nova maquina.
--++ Instalação dos nodes
yum install sendmail sendmail-cf -y
echo "define(\`SMART_HOST', \`[spserv01.sprace.org.br]')dnl" >> /etc/mail/sendmail.mc
sed -i "s/osgce/spsrv01/g" /etc/aliases
cd /etc/mail
make
newaliases
/etc/init.d/sendmail restart
sed -i "s/192.168.1.150/192.168.1.155/g" /etc/yp.conf
/etc/init.d/ypbind restart
sed -i "s/192.168.1.150/192.168.1.155/g" /etc/ntp/step-tickers
sed -i "s/192.168.1.150/192.168.1.155/g" /etc/ntp.conf
sed -i "s/192.168.1.150/192.168.1.155/g" /etc/sysconfig/network
/etc/init.d/ntpd restart
/etc/init.d/network restart
___________________________________________________________________________________________
Nova OSG-CE
yum -y install bind-chroot
chmod 755 /var/named/
chmod 775 /var/named/chroot/
chmod 775 /var/named/chroot/var/
chmod 775 /var/named/chroot/var/named/
chmod 775 /var/named/chroot/var/run/
chmod 777 /var/named/chroot/var/run/named/
cd /var/named/chroot/var/named/
ln -s ../../ chroot
cp /usr/share/doc/bind-9.3.6/sample/var/named/named.local /var/named/chroot/var/named/named.local
cp /usr/share/doc/bind-9.3.6/sample/var/named/named.root /var/named/chroot/var/named/named.root
cp /usr/share/doc/bind-9.3.6/sample/etc/named.conf /var/named/chroot/etc/
vim /var/named/chroot/etc/named.conf
Copia das zonas que estavam na osg-ce
/etc/init.d/named start
chkconfig --levels 235 named on
mkdir /opt/condor
cd /tmp
wget http://parrot.cs.wisc.edu//symlink/20091218081501/7/7.4/7.4.0/05bc0e1d595629a37138c3b0113abce8/condor-7.4.0-linux-x86_64-rhel5.tar.gz
tar -xvzf condor-7.4.0-linux-x86_64-rhel5.tar.gz
cd condor-7.4.0
./condor_configure --install --maybe-daemon-owner --make-personal-condor --install-log /opt/condor/post_install --install-dir /opt/condor/
vim /opt/condor/etc/condor_config
vim /scratch/condor/condor_config.local
O mesmo para os nós 32 bit
mkdir /opt/condor-i386
wget http://parrot.cs.wisc.edu//symlink/20091219041505/7/7.4/7.4.0/f98e39fd24797ddcb7536c1037e36e2d/condor-7.4.0-linux-x86-rhel5.tar.gz
tar -xvzf condor-7.4.0-linux-x86-rhel5.tar.gz
cd condor-7.4.0
./condor_configure --install --maybe-daemon-owner --make-personal-condor --install-log /opt/condor-i386/post_install --install-dir /opt/condor-i386/
cp /opt/condor/etc/condor_config /opt/condor-i386/etc/condor_config
Diretórios que serão exportados via NFS para os nodes, que montarao usando o
/opt/condor
localmente.
Instalando o
JobOverview
cd /tmp/
wget http://sarkar.web.cern.ch/sarkar/dist/Condor/jobview_v1.0.0.tgz
tar -xvzf jobview_v1.0.0.tgz
mv jobview_v1.0.0 /opt/jobview
cd /opt/jobview/
vi lib/config.pl
site => q|T2_BR_SPRACE|,
baseDir => q|/opt/jobview|,
collector => q|192.168.1.150|,
schedd => q|192.168.1.150|,
domain => q|sprace.org.br|,
verbose => 1,
constraint => {
'condor_q -global' => qq|SleepSlot =!= TRUE|,
'condor_status' => qq|TRUE|
vi bin/overview.sh
vi setup.sh
vi bin/overview.cron
vi bin/create_rrd.sh
export PERL5LIB=/opt/jobview/lib:$PERL5LIB
mkdir -p /var/www/html/condor/images
cp -r html/css /var/www/html/condor/
cp -r html/js /var/www/html/condor/
yum install perl-rrdtool -y
perl -MCPAN -e shell
cpan> install Template::Alloy
cpan> install Class::Singleton
cpan> install HTTP::Date
cd bin/
./create_rrd.sh
./overview.sh
cp overview.cron /etc/cron.d # after checking that the path is correct
service crond restart
touch /etc/crontab
Devido a um bug é necessário mudar a variavel "my $sep" no /opt/jobview/lib/JobList.pm tem que ser mudada para my $sep = 'ServerTime = '
cd /opt
wget http://atlas.bu.edu/~youssef/pacman/sample_cache/tarballs/pacman-3.28.tar.gz
tar --no-same-owner -xzvf pacman-3.28.tar.gz
cd pacman-3.28
source setup.sh
mkdir /opt/osg-1.2.4
cd /opt/osg-1.2.4
export VDTSETUP_CONDOR_LOCATION=/opt/condor
export VDTSETUP_CONDOR_CONFIG=${VDTSETUP_CONDOR_LOCATION}/etc/condor_config
pacman -get http://software.grid.iu.edu/osg-1.2:ce
source setup.sh
http://software.grid.iu.edu/osg-1.2:Globus-Condor-Setup
vi $VDT_LOCATION/globus/lib/perl/Globus/GRAM/JobManager/condor.pm
# $requirements .= " && Arch == \"" . $description->condor_arch() . "\" ";
source setup.sh; vdt-post-install
pacman -get http://software.grid.iu.edu/osg-1.2:ManagedFork
$VDT_LOCATION/vdt/bin/vdt-ca-manage setupca --location local --url osg
$VDT_LOCATION/vdt/setup/configure_globus_gatekeeper --managed-fork y --server y
vi $VDT_LOCATION/gums/config/gums-client.properties
vi /opt/osg-1.2.4/post-install/prima-authz.conf
cp $VDT_LOCATION/post-install/prima-authz.conf /etc/grid-security/.
cp /opt/osg-1.2.4/post-install/gsi-authz.conf /etc/grid-security/.
vdt-control --enable gums-host-cron
vdt-control --on gums-host-cron
visudo
Runas_Alias GLOBUSUSERS = ALL, !root
daemon ALL=(GLOBUSUSERS) \
NOPASSWD: \
/opt/osg-1.2.4/globus/libexec/globus-job-manager-script.pl *
daemon ALL=(GLOBUSUSERS) \
NOPASSWD: \
/opt/osg-1.2.4/globus/libexec/globus-gram-local-proxy-tool *
vi /opt/osg-1.2.4/osg/etc/config.ini
configure-osg -v
configure-osg -c
Como a farm tem ainda worker nodes 32 bits,
vi $VDT_LOCATION/globus/lib/perl/Globus/GRAM/JobManager/condor.pm
# $requirements .= " && Arch == \"" . $description->condor_arch() . "\" ";
vi $VDT_LOCATION/vdt/etc/vdt-local-setup.sh
GLOBUS_TCP_SOURCE_RANGE=40000,50000
GLOBUS_TCP_PORT_RANGE=40000,50000
export GLOBUS_TCP_SOURCE_RANGE
export GLOBUS_TCP_PORT_RANGE
vi $VDT_LOCATION/vdt/etc/vdt-local-setup.csh
setenv GLOBUS_TCP_SOURCE_RANGE 40000,50000
setenv GLOBUS_TCP_PORT_RANGE 40000,50000
vi /etc/sysctl.conf
# Limit ephemeral ports to avoid globus tcp port range
# See OSG CE install guide
net.ipv4.ip_local_port_range = 10240 39999
sysctl -p
vi /etc/hosts.allow
ALL : localhost
vdt-run-gsiftp.sh : ALL
vdt-run-globus-gatekeeper.sh : ALL
cd /etc/grid-security
mkdir http
cp hostcert.pem /etc/grid-security/http/httpcert.pem
cp hostkey.pem /etc/grid-security/http/httpkey.pem
chown -R daemon.daemon /etc/grid-security/http
cd /etc/grid-security/
cp hostkey.pem containerkey.pem
cp hostcert.pem containercert.pem
chown globus: containerkey.pem containercert.pem
vdt-control --on --force
fdisk -l /dev/sda
yum install mdadm -y
grub> root (hd0,0)
grub> setup (hd0)
grub> root (hd1,0)
grub> setup (hd1)
grub> quit
Certificados do GridUnesp
vi $VDT_LOCATION/vdt/etc/vdt-update-certs.conf
include=/opt/local/certs/a795b224.0
include=/opt/local/certs/a795b224.signing_policy
Para exportar o proxy do glexec para os nodes
cp /home/mdias/migration/ProxyRenewNodes.sh /usr/local/bin/.
crontab -e
55 */11 * * * /usr/local/bin/ProxyRenewNodes.sh
Script que mantem a prioridade dos usuários locais maior que os usuários de grid no condor
cp /home/mdias/migration/setpriocondor.sh /usr/local/bin/.
crontab -e
*/30 * * * * /usr/local/bin/setpriocondor.sh
backup do OSG
cp backup_diario.sh /usr/local/bin/.
crontab -e
00 02 * * 1-7 /usr/local/bin/backup_diario.sh
Transformando o raid1 em hd ativo
Deve-se formatar o raid1 da antiga CE. Siga as orientacões abaixo para formatar todas as particões do raid e criar os pontos de montagem.
Ex:
mkfs.ext3 /dev/md0
mkdir /new
mount /dev/md0 /new
Copie todos os diretórios para os novos dir.
Ex:
cp -a /boot.* /new/boot
cp -a /opt.* /new/opt
Não copiar os diretório /sys /proc /dev .
O /proc é usado para processos correntes apenas, se fizer cópia deste diretório trava o processo de cópia.
mount -o bind /dev /new/dev
mount -t proc none /new/proc
mount -t sysfs none /new
O diretório /tmp só não pode ser movido:
chmod 1777 /new/tmp
Entre como root no novo servidor:
chroot /new/bin/bash
Pontos de montagem e grub
Prestar atencão nos pontos de montagem.
Editar de acordo com a tabela do original do osg-ce.
/new/etc/fstab
/new/etc/mtab
/new/boot/grub.conf
Instalando o grub:
grub_install /dev/sdb
mkinitrd -f /boot/initrd-2.6.X.X.img 2.6.X.X
O HD antigo da CE será usado para fazer parte do raid da nova CE.
A CE antiga deverá ter seu raid1 totalmente desfeito e remontado para que sincronize com a nova CE e não o contrário.
Updates
Fulano em dd/mm/aaaa
Coloca o que fez.
Ciclano em dd/mm/aaaa
Mais comentarios
--
AllanSzu - 08 Jan 2010
--
MarcoAndreFerreiraDias - 06 Jan 2010