Before Starting

  1. HOSTNAME:
    1. CREAM CE Host: emitestbed21.cnaf.infn.it ----> NOTE here we deploy the CE and SGE qmaster in differet hosts
    2. Grid Engine Server Host: emitestbed22.cnaf.infn.it
    3. Worker Node Host: emitestbed12.cnaf.infn.it
    4. Worker Node Host: emitestbed13.cnaf.infn.it
  2. OS: SL5 X86_64 Installed + Network
  3. Host certificate required: must be copied under /etc/grid-security/ before yaim configuration

Testbed Installation

Sun Grid Engine QMASTER Installation

The GE integration in EMI assumes that you already have GE working in cluster mode. Some general considerations:

  1. Better to install GE master in a separate machine from your CreamCE. HW req.:standard VM, 20GB Disk + 2GB RAM
  2. The GE Exec Node should be installed in a WN, and it copes with the WN requirements
  3. For the integration, keep in mind about the specific information stated in http://wiki.italiangrid.it/twiki/bin/view/CREAM/SystemAdministratorGuideForEMI1#1_5_3_Grid_Engine

We followed the GE Cookbook guide for GE on EMI provided by CreamGE plugin certification team. Installation steps: *. Download the tarball

    1. wget http://sourceforge.net/projects/gridscheduler/files/GE2011.11/GE2011.11.tar.gz/download
    2. gunzip GE2011.11.tar.gz
    3. tar -xvf GE2011.11.tar *. Compiling source:
    4. cd GE2011.11/source/
    5. NOTE: some packages maybe required on your SL5 x64 box: ( csh, gcc, java-1.6.0-openjdk, pam, pam-devel, libX11-devel, xlib-mesa, libXt-devel, libXmu-devel, openmotif-devel, libXpm-devel, doxygen, cairo.x8664, cairo-devel.x86_64, pycairo, libtermcap.x86_64 libtermcap-devel.x86_64, libtermcap.x86_64, termcap.noarch, mingw32 ..)
    6. ./aimk -no-java -no-jni -no-secure -spool-classic -no-dump -only-depend
    7. ./scripts/zerodepend
    8. ./aimk -no-java -no-jni -no-secure -spool-classic -no-dump depend
    9. ./aimk -no-java -no-jni -no-secure -spool-classic -no-dump *. Installation from compiled binaries in the SGE QMASTER:
    10. $> export SGE_ROOT=/opt/SGE
    11. $> mkdir $SGE_ROOT
    12. $>./scripts/distinst -all -local -noexit -> run from directory where binaries are (GE2011.11/source) CHECK installation directory and answer Yes to questions
    13. $> cd $SGE_ROOT
    14. $>./install_qmaster (the command line way on REDHAT). Follow GE Cookbook guide for GE on EMI
    15. $> source /opt/SGE/default/common/settings.sh
    16. $> qconf -mconf ->>>> ADDING in the editor coming out from the command the following line "execd_params INHERIT_ENV=false"
    17. $> qconf -as emitestbed21.cnaf.infn.it ---> ADDING CE as submitting host
    18. $> qconf -as emitestbed22.cnaf.infn.it ---> ADDING MASTER as submitting host
    19. $> qmon ---> Start qmon (need ssh -X to qmaster host)
    20. $> 1; click on queue 2; select all.q 3; click on clone queue 4; write "emitesters" in name field ---> ADDING A queue
    21. $> qconf -sql ----> this should show existing queues (all.q and emitesters)
    22. $> qconf -au tst05,tst06,tst07,tst08,tst09,tst10,tst11,tst12,tst13,tst14,tst15,tst16,tst17,tst18,tst19,tst20,tst21,tst22,tst23,tst24,tst25,tst26,tst27,tst28,tst29,tst30 emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters,emitesters
    23. $> qconf -su emitesters ------> VERIFY it worked

[root@emitestbed21 ~]# qconf -su emitesters
name    emitesters
type    ACL
fshare  0
oticket 0
entries tst03,tst04,tst01,tst02,tst05,tst06,tst07,tst08,tst09,tst10,tst11, \
        tst12,tst13,tst14,tst15,tst16,tst17,tst18,tst19,tst20,tst21,tst22, \
        tst23,tst24,tst25,tst26,tst27,tst28,tst29,tst30

    1. $> qconf -mconf ->>>> ADDING in the editor coming out from the command the following line "user_list emitesters"

[root@emitestbed22 ~]# qconf -sql 
all.q
emitesters

    1. $> touch /opt/SGE/default/common/accounting
    2. $> chmod 644 /opt/SGE/default/common/accounting
    3. $> qconf -mconf ----> ADDING prolog and epilog
    4. $> qconf -sconf ------> general configuration

[root@emitestbed21 ~]# qconf -sconf  
#global:
execd_spool_dir              /opt/SGE/default/spool
mailer                       /bin/mail
xterm                        /usr/bin/X11/xterm
load_sensor                  none
prolog                       none
epilog                       none
shell_start_mode          unix_behavior
login_shells                 sh,bash,ksh,csh,tcsh
min_uid                      0
min_gid                      0
user_lists                   none
xuser_lists                  none
projects                     none
xprojects                    none
enforce_project              false
enforce_user                 auto
load_report_time             00:00:40
max_unheard                  00:05:00
reschedule_unknown           02:00:00
loglevel                     log_info
administrator_mail           root@localhost
set_token_cmd                none
pag_cmd                      none
token_extend_time            none
shepherd_cmd                 none
qmaster_params               none
execd_params                 INHERIT_ENV=false
reporting_params             accounting=true reporting=false \
                             flush_time=00:00:15 joblog=false sharelog=00:00:00
finished_jobs                100
gid_range                    20000-20100
qlogin_command               builtin
qlogin_daemon                builtin
rlogin_command               builtin
rlogin_daemon                builtin
rsh_command                  builtin
rsh_daemon                   builtin
max_aj_instances             2000
max_aj_tasks                 75000
max_u_jobs                   0
max_jobs                     0
max_advance_reservations     0
auto_user_oticket            0
auto_user_fshare             0
auto_user_default_project    none
auto_user_delete_time        86400
delegated_file_staging       false
reprioritize                 0
jsv_url                      none
jsv_allowed_mod              ac,h,i,e,o,j,M,N,p,w

    1. $> qconf -sq emitesters ---> EXAMPLE OF QUEUE CONFIGURATION

[root@emitestbed21 ~]# qconf -sq emitesters
qname                 emitesters
hostlist              emitestbed12.cnaf.infn.it emitestbed13.cnaf.infn.it
seq_no                0
load_thresholds       np_load_avg=1.75
suspend_thresholds    NONE
nsuspend              1
suspend_interval      00:05:00
priority              0
min_cpu_interval      00:05:00
processors            UNDEFINED
qtype                 BATCH INTERACTIVE
ckpt_list             NONE
pe_list               make
rerun                 FALSE
slots                 1
tmpdir                /tmp
shell                 /bin/csh
prolog                root@/opt/SGE/default/queues_conf/prolog.sh
epilog                /opt/glite/bin/sge_filestaging --stageout
shell_start_mode      posix_compliant
starter_method        NONE
suspend_method        NONE
resume_method         NONE
terminate_method      NONE
notify                00:00:60
owner_list            NONE
user_lists            emitesters
xuser_lists           NONE
subordinate_list      NONE
complex_values        NONE
projects              NONE
xprojects             NONE
calendar              NONE
initial_state         default
s_rt                  INFINITY
h_rt                  INFINITY
s_cpu                 INFINITY
h_cpu                 INFINITY
s_fsize               INFINITY
h_fsize               INFINITY
s_data                INFINITY
h_data                INFINITY
s_stack               INFINITY
h_stack               INFINITY
s_core                INFINITY
h_core                INFINITY
s_rss                 INFINITY
h_rss                 INFINITY
s_vmem                INFINITY
h_vmem                INFINITY

*. post configuration

    1. $> Edit profile.d for startup initialization

 [root@emitestbed22 ~]# cat /etc/profile.d/SGE.sh 
export SGE_ROOT=/opt/SGE
export SGE_CELL=default
. /opt/SGE/default/common/settings.sh 

    1. $> NFS setup to share SGE directories with CREAM CE Host
    2. $> yum install nfs nfs-utils portmap
    3. $> edit NFS config files: /etc/exports ; /etc/hosts.allow ; /etc/hosts.deny
$>cat /etc/exports 
/opt/SGE/   YOURHOSTIP(rw,sync,no_root_squash) 

[root@emitestbed22 ~]# cat /etc/hosts.allow 
portmap: YOURHOSTIP
lockd: YOURHOSTIP
mountd: YOURHOSTIP
rquotad: YOURHOSTIP
statd: YOURHOSTIP

[root@emitestbed22 ~]# cat /etc/hosts.deny 
portmap: ALL
lockd:ALL
mountd:ALL
rquotad:ALL
statd:ALL

    1. $> service portmap start
    2. $> service nfs start
    3. $> chkconfig nfs on
    4. $> chkconfig portmap on

After that needed to reset the queues status with qmod -cq '*' qmod -cj '*'

WNs Installation

  1. SGE In the WNs:
    1. copy the same compiled versions as from the SGE MASTER
    2. $> export SGE_ROOT=/opt/SGE
    3. $> mkdir $SGE_ROOT
    4. $>./scripts/distinst -all -local -noexit -> run from directory where binaries are (GE2011.11/source) CHECK installation directory and answer Yes to questions
    5. $> cd $SGE_ROOT
    6. $>./install_qmaster (the command line way on REDHAT). Follow GE Cookbook guide for GE on EMI
    7. $>./install_execd (the command line way)
    8. $> Edit profile.d for startup initialization

 [root@emitestbed22 ~]# cat /etc/profile.d/SGE.sh 
export SGE_ROOT=/opt/SGE
export SGE_CELL=default
. /opt/SGE/default/common/settings.sh 

Get repo files form http://emisoft.web.cern.ch/emisoft/dist/EMI/1/sl5/repos and the KEY which should be placed /etc/pki/rpm-gpg/RPM-GPG-KEY-emi then install:

So you must install Grid Engine and to configure it you can use yaim, for that

    • Put in /opt/glite/yaim/etc/ conf files: VOs.def, groups.conf, site-info.def, users.conf, wn-list.conf
    • Create file /etc/hosts: echo "your_ip_machine your_name_machine">>/etc/hosts
    • Our sample site-info.def:

[root@emitestbed12 siteinfo]# grep -v "#" site-info.def services/*
site-info.def:YAIM_LOGGING_LEVEL=DEBUG
site-info.def:MY_DOMAIN="cnaf.infn.it"
site-info.def:INSTALL_DIR="/root/siteinfo"
site-info.def:SITE_NAME="INFN_CNAF_EMI"
site-info.def:DPM_HOST="lxbra2506v1.cern.ch"
site-info.def:CLASSIC_HOST="lxbra1910.cert.ch"
site-info.def:CLASSIC_STORAGE_DIR="/storage"
site-info.def:SE_LIST="$CLASSIC_HOST $DPM_HOST"
site-info.def:SE_MOUNT_INFO_LIST="none"
site-info.def:BDII_HOST=certtbrc-bdii-site.cern.ch
site-info.def:WN_LIST=$INSTALL_DIR/wn-list.conf
site-info.def:USERS_CONF=$INSTALL_DIR/users.conf
site-info.def:GROUPS_CONF=$INSTALL_DIR/groups.conf
site-info.def:FUNCTIONS_DIR=/opt/glite/yaim/functions
site-info.def:OUTPUT_STORAGE=/tmp/jobOutput
site-info.def:JAVA_LOCATION="/usr/lib/jvm/java"
site-info.def:CRON_DIR=/etc/cron.d
site-info.def:CE_HOST=emitestbed21.$MY_DOMAIN
site-info.def:BATCH_SERVER=emitestbed22.cnaf.infn.it
site-info.def:BATCH_VERSION="GE 2011.11"
site-info.def:CE_SMPSIZE=2
site-info.def:SGE_ROOT="/opt/SGE"
site-info.def:SGE_CELL="default"
site-info.def:SGE_QMASTER="536"
site-info.def:SGE_EXECD="537"
site-info.def:SGE_SPOOL_METH="classic"
site-info.def:SITE_EMAIL="sitemanager@cnaf.infn.it"
site-info.def:SITE_LAT=42.0
site-info.def:SITE_LONG=11.0
site-info.def:SITE_NAME=INFN_CNAF_EMI
site-info.def:USE_ARGUS=no
site-info.def:ARGUS_PEPD_ENDPOINTS="http://pepd.example.org:8154/authz"
site-info.def:VOS="testers.eu-emi.eu"
site-info.def:ALL_VOMS_VOS="testers.eu-emi.eu"
site-info.def:QUEUES="emitesters"
site-info.def:EMITESTERS_GROUP_ENABLE="dteam testers.eu-emi.eu /testers.eu-emi.eu/ROLE=pilot"
site-info.def:VO_SW_DIR=/opt/exp_soft

  • Configure: /opt/glite/yaim/bin/yaim -c -s site-info.def -n glite-WN -n SGE_client
  • Configure ssh passwordless between your CE and your WN's:

[root@emitestbed12 ~]# grep -v "#" /etc/ssh/sshd_config 
Protocol 2
SyslogFacility AUTHPRIV
PasswordAuthentication yes
ChallengeResponseAuthentication no
GSSAPIAuthentication yes
GSSAPICleanupCredentials yes
UsePAM yes
AcceptEnv LANG LC_CTYPE LC_NUMERIC LC_TIME LC_COLLATE LC_MONETARY LC_MESSAGES 
AcceptEnv LC_PAPER LC_NAME LC_ADDRESS LC_TELEPHONE LC_MEASUREMENT 
AcceptEnv LC_IDENTIFICATION LC_ALL
X11Forwarding yes
Subsystem       sftp    /usr/libexec/openssh/sftp-server
HostbasedAuthentication yes
IgnoreUserKnownHosts yes
IgnoreRhosts yes


------------------------------------
[root@emitestbed12 ~]# cat  /etc/ssh/shosts.equiv
emitestbed21.cnaf.infn.it
emitestbed12.cnaf.infn.it
emitestbed13.cnaf.infn.it

-----------------------------------
[root@emitestbed12 ~]# cat  /etc/ssh/ssh_config 
Host *
   RhostsAuthentication yes
   RhostsRSAAuthentication yes
   RSAAuthentication yes
   PasswordAuthentication yes
   EnableSSHKeysign yes
   HostbasedAuthentication yes

Also check if in /etc/ssh_known_hosts you have appropriate entries with keys of your WN's and own CREAM

*. from WNs enter a pool account with $> su - tst13 and test that "ssh CEhostname " works without password

*. Notice that it is important to check the number of processor available, which is auto detected. If it is 0, try to restart sge exeecd daemons.

root@emitestbed13 ~]# qconf -se emitestbed13
hostname              emitestbed13.cnaf.infn.it
load_scaling          NONE
complex_values        NONE
load_values           arch=linux-x64,num_proc=1,mem_total=1001.542969M, \
                      swap_total=2015.992188M,virtual_total=3017.535156M, \
                      load_avg=0.000000,load_short=0.000000, \
                      load_medium=0.000000,load_long=0.000000, \
                      mem_free=732.828125M,swap_free=2015.929688M, \
                      virtual_free=2748.757812M,mem_used=268.714844M, \
                      swap_used=0.062500M,virtual_used=268.777344M, \
                      cpu=100.000000,m_topology=SC,m_topology_inuse=SC, \
                      m_socket=1,m_core=1,np_load_avg=0.000000, \
                      np_load_short=0.000000,np_load_medium=0.000000, \
                      np_load_long=0.000000
processors            1
user_lists            emitesters
xuser_lists           NONE
projects              NONE
xprojects             NONE
usage_scaling         NONE
report_variables      cpu

CREAM Installation

*. SGE In the CREAM CE: NFS SGE directory import from SGE Master host.

    1. mkdir /opt/SGE
    2. Add the following line to your /etc/fstab : " YOURSGEMASTERIP:/opt/SGE /opt/SGE nfs rw,defaults 0 0 "
    3. yum install nfs-utils portmap
    4. service portmap stop
    5. service portmap start
    6. mount -a
    7. chkconfig portmap on

Service Installation

  1. Repositories ( see EMI basic configuration):
    1. egi-trustanchors.repo + emi1.repo + epel.repo
  2. $> yum clean all
  3. $> yum makecache
  4. INSTALLING CREAM SERVER + SGE
    1. $> wget http://emisoft.web.cern.ch/emisoft/dist/EMI/1/RPM-GPG-KEY-emi
    2. $> mv RPM-GPG-KEY-emi /etc/pki/rpm-gpg/
    3. $> yum install yum-protectbase.noarch ca-policy-egi-core xml-commons-apis emi-release
    4. $> yum install emi-cream-ce
    5. $> yum install emi-ge-utils
    6. $> yum install fetch-crl

Service Configuration

  1. PREPARING FOR YAIM CONFIGURATION:
    1. COPY YAIM FILES UNDER ROOT: $>cp -r /opt/glite/yaim/examples/siteinfo/ /root/siteinfo/
    2. TESTERS VO files: follow description at [[https://twiki.cern.ch/twiki/bin/view/EMI/TestBed] [VO TESTERS HOWTO section 3.5]], download the following files: groups.conf, users.conf + vo.d directory
    3. CONTENT of /root/siteinfo: site-info.def + groups.conf + users.conf + vo.d (DIR) + wn-list.conf + services/
    4. Find below the settings for Inter-Component testing EMI Testbed. Of course you will need to adapt these values to your own site (hostnames of other servers (SE, BDII, ARGUS..) , passwd, etc. etc. )


[root@emitestbed21 siteinfo]# cat wn-list.conf 
emitestbed12.cnaf.infn.it
emitestbed13.cnaf.infn.it


site-info.def:
site-info.def:YAIM_LOGGING_LEVEL=DEBUG
site-info.def:MY_DOMAIN="cnaf.infn.it"
site-info.def:INSTALL_DIR="/root/siteinfo"
site-info.def:WN_LIST=$INSTALL_DIR/wn-list.conf
site-info.def:USERS_CONF=$INSTALL_DIR/users.conf
site-info.def:GROUPS_CONF=$INSTALL_DIR/groups.conf
site-info.def:FUNCTIONS_DIR=/opt/glite/yaim/functions
site-info.def:OUTPUT_STORAGE=/tmp/jobOutput
site-info.def:JAVA_LOCATION="/usr/lib/jvm/java"
site-info.def:CRON_DIR=/etc/cron.d
site-info.def:MYSQL_PASSWORD="XXXXXXXXX"
site-info.def:SITE_EMAIL="sitemanager@cnaf.infn.it"
site-info.def:SITE_LAT=42.0 
site-info.def:SITE_LONG=11.0
site-info.def:CE_HOST=emitestbed21.$MY_DOMAIN
site-info.def:CE_PHYSCPU=1
site-info.def:CE_LOGCPU=1
site-info.def:CE_CPU_MODEL=XEON
site-info.def:CE_CPU_VENDOR=Intel
site-info.def:CE_CPU_SPEED=2800
site-info.def:CE_OS="Scientific Linux"
site-info.def:CE_OS_RELEASE="5.5"
site-info.def:CE_OS_VERSION="Beryllium"
site-info.def:CE_OS_ARCH=x86_64
site-info.def:CE_MINPHYSMEM=2048
site-info.def:CE_MINVIRTMEM=4096
site-info.def:CE_SMPSIZE=2
site-info.def:CE_SI00=1286
site-info.def:CE_SF00=1496
site-info.def:CE_OUTBOUNDIP=TRUE
site-info.def:CE_INBOUNDIP=FALSE
site-info.def:CE_RUNTIMEENV="
site-info.def:    LCG-2
site-info.def:    LCG-2_1_0
site-info.def:    LCG-2_1_1
site-info.def:    LCG-2_2_0
site-info.def:    LCG-2_3_0
site-info.def:    LCG-2_3_1
site-info.def:    LCG-2_4_0
site-info.def:    LCG-2_5_0
site-info.def:    LCG-2_6_0
site-info.def:    LCG-2_7_0
site-info.def:    GLITE-3_0_0
site-info.def:    GLITE-3_1_0
site-info.def:    R-GMA
site-info.def:    CNAF
site-info.def:    SI00MeanPerCPU_870
site-info.def:    SF00MeanPerCPU_790
site-info.def:"
site-info.def:BASE_SW_DIR=/flatfiles/SE01/exp_soft
site-info.def:CE_CAPABILITY="CPUScalingReferenceSI00=value [Share=vo-name1:value [Share=vo-name2:value [...]]]"
site-info.def:CE_OTHERDESCR="Cores=value[,Benchmark=value-HEP-SPEC06]" 
site-info.def:BATCH_SERVER=emitestbed22.cnaf.infn.it
site-info.def:JOB_MANAGER=sge
site-info.def:BATCH_VERSION="GE 2011.11"
site-info.def:CE_BATCH_SYS=sge
site-info.def:BATCH_LOG_DIR= "/opt/SGE/default/common/accounting"
site-info.def:BATCH_BIN_DIR="/opt/SGE/bin/linux-x64/"
site-info.def:SGE_ROOT="/opt/SGE"
site-info.def:SGE_CELL="default"
site-info.def:SGE_QMASTER="536"
site-info.def:SGE_EXECD="537"
site-info.def:SGE_SPOOL_METH="classic"
site-info.def:BLPARSER_WITH_UPDATER_NOTIFIER="true"
site-info.def:JOB_MANAGER=sge
site-info.def:CE_BATCH_SYS=sge
site-info.def:SGE_SHARED_INSTALL=yes
site-info.def:MON_HOST=emitb5.ics.muni.cz
site-info.def:APEL_DB_PASSWORD="XXXXXXXXXXXX"
site-info.def:SE_LIST="$CLASSIC_HOST"
site-info.def:DPM_HOST="cvitbdpm1.cern.ch"
site-info.def:CLASSIC_HOST="lxbra1910.cern.ch"
site-info.def:SE_HOST=$DPM_HOST
site-info.def:CLASSIC_STORAGE_DIR="/storage"
site-info.def:SE_LIST="$CLASSIC_HOST $DPM_HOST"
site-info.def:SE_MOUNT_INFO_LIST="none"
site-info.def:BDII_HOST=certtb-bdii-site.cern.ch
site-info.def:SITE_BDII_HOST=certtb-bdii-site.cern.ch
site-info.def:SITE_NAME="INFN-CNAF_EMI"
site-info.def:BDII_SITE_TIMEOUT=120
site-info.def:BDII_RESOURCE_TIMEOUT=`expr "$BDII_SITE_TIMEOUT" - 5`
site-info.def:GIP_RESPONSE=`expr "$BDII_RESOURCE_TIMEOUT" - 5`
site-info.def:GIP_FRESHNESS=60
site-info.def:GIP_CACHE_TTL=300
site-info.def:GIP_TIMEOUT=150
site-info.def:GIP_CACHE_DIR=/var/tmp/lcg-info-dynamic-cache
site-info.def:SITE_NAME="INFN-CNAF_EMI"
site-info.def:BDII_SITE_TIMEOUT=120
site-info.def:BDII_RESOURCE_TIMEOUT=`expr "$BDII_SITE_TIMEOUT" - 5`
site-info.def:GIP_RESPONSE=`expr "$BDII_RESOURCE_TIMEOUT" - 5`
site-info.def:GIP_FRESHNESS=60
site-info.def:GIP_CACHE_TTL=300
site-info.def:GIP_TIMEOUT=150
site-info.def:GIP_CACHE_DIR=/var/tmp/lcg-info-dynamic-cache
site-info.def:VOS="dteam testers.eu-emi.eu"
site-info.def:ALL_VOMS_VOS="dteam testers.eu-emi.eu"
site-info.def:QUEUES="emitesters"
site-info.def:EMITESTERS_GROUP_ENABLE="dteam testers.eu-emi.eu /testers.eu-emi.eu/ROLE=pilot"
site-info.def:VO_SW_DIR=/opt/exp_soft
site-info.def:VO_DTEAM_SW_DIR=$VO_SW_DIR/dteam
site-info.def:VO_DTEAM_DEFAULT_SE=$CLASSIC_HOST
site-info.def:VO_DTEAM_STORAGE_DIR=$CLASSIC_STORAGE_DIR/dteam
site-info.def:VO_DTEAM_VOMS_SERVERS='vomss://voms.hellasgrid.gr:8443/voms/dteam?/dteam/'
site-info.def:VO_DTEAM_VOMSES="'dteam voms.hellasgrid.gr 15004 /C=GR/O=HellasGrid/OU=hellasgrid.gr/CN=voms.hellasgrid.gr dteam 24' 'dteam voms2.hellasg
rid.gr 15004 /C=GR/O=HellasGrid/OU=hellasgrid.gr/CN=voms2.hellasgrid.gr dteam 24'"
site-info.def:VO_DTEAM_VOMS_CA_DN="'/C=GR/O=HellasGrid/OU=Certification Authorities/CN=HellasGrid CA 2006' '/C=GR/O=HellasGrid/OU=Certification Authori
ties/CN=HellasGrid CA 2006'"
services/glite-creamce:
services/glite-creamce:CEMON_HOST=emitestbed21.cnaf.infn.it
services/glite-creamce:ACCESS_BY_DOMAIN=no
services/glite-creamce: CREAM_DB_USER=SSSSSSSSS
services/glite-creamce:CREAM_DB_PASSWORD=XXXXXXXXX
services/glite-creamce:BLPARSER_HOST=emitestbed21.cnaf.infn.it
services/glite-creamce:BLP_PORT=33333
services/glite-creamce:CREAM_PORT=56565
services/glite-creamce:BLAH_JOBID_PREFIX=cre09
services/glite-creamce:RESET_CREAM_DB_GRANTS=yes
services/glite-creamce:GLEXEC_CREAM_LOG_DESTINATION=file

i. Make sure that ports 536 and 537 are booked for SGE:

[root@emitestbed02 siteinfo]# grep sge  /etc/services 
sge_qmaster     536/tcp           # tcp port for SGE Qmaster daemon
sge_execd       537/tcp           # tcp port for SGE execd service

    1. $> /opt/glite/yaim/bin/yaim -c -s site-info.def -n creamCE -n SGE_utils

Note :

  1. YAIM variables set into /root/siteinfo/services/XXXfiles will overrid variables in siteinfo.def file with same name
  2. some YAIM variables are set in the yaim defaults files: /opt/glite/yaim//defaults/XXXfiles

  1. After all yaim files are OK run the following commands:
    1. $> /opt/glite/yaim/bin/yaim -c -s siteinfo/site-info.def -n creamCE -n SGE_utils
    2. If everything's OK yaim will end with (BTW yaim output is in /opt/glite/yaim/log/yaimlog file)

.....
  INFO: Configuration Complete.                                               [  OK  ]
  INFO: YAIM terminated succesfully.

  1. CHECKS AFTER YAIM CONFIGURATION:
    1. $echo "-u *"> $SGE_ROOT/default/common/sge_qstat ---------> Create
    2. CHECK for this problem of INFO publishing https://wiki.italiangrid.it/twiki/bin/view/CREAM/KnownIssues#Problem_with_generic_dynamic_sch
    3. ssh passwordless:
      1. Check the same configuration as in WNs above.
      2. from CE enter a pool account with $> su - tst13 and test that "ssh WNhostname " works without password

Service Testing

  1. Test daemons:

[root@emitestbed21 siteinfo]# service gLite status
*** tomcat5:
/etc/init.d/tomcat5 is already running (17281)

*** glite-lb-locallogger:
glite-lb-logd running as 18727
glite-lb-interlogd running as 18727

*** glite-ce-blahparser:
BNotifier (pid 16486) is running...
BUpdaterPBS (pid 16490) is running...

  1. Check sge process is running:
    1. on CREAM
[tst01@emitestbed21 ~]$ ps aux |grep -i sge
tomcat     579  0.0  0.0   6252   764 ?        S    Feb28   1:15 /usr/bin/BUpdaterSGE
ldap     10459  3.0  0.5  49404 11948 ?        S    15:08   0:00 /usr/bin/perl /usr/libexec/glite-info-dynamic-sge --info -c /etc/lcg-info-dynamic-scheduler.conf
ldap     10460  0.2  0.0  15348   772 ?        S    15:08   0:00 /opt/SGE/bin/linux-x64/qstat -help

    1. on SGE master
[root@emitestbed22 ~]# ps aux |grep -i sge
root      9773  0.0  0.1 199332  4464 ?        Sl   Mar02   2:39 /opt/SGE/bin/linux-x64/sge_qmaster

    1. on WNs

[root@emitestbed13 ~]# ps aux |grep -i sge
root     32390  0.0  0.2 115568  2508 ?        Sl   Mar02   0:07 /opt/SGE/bin/linux-x64/sge_execd

  1. Troubleshooting on queues:
    1. Submit from CE user:
      1. on CREAM do : $> su - tst01
      2. tst01$> qsub /bin/hostname
      3. $>qstat -u '*'
      4. after job is finished in qstat do , $> qaact -j JOBID_SGE
      5. IF from qstat queue have a problem of unavailable queues try resetting them with>
        1. $> qmod -cq '*'
        2. $> qmod -cj '*'
    2. Other useful commands:
      1. check logs in /opt/SGE/default/spool/qmaster/messages
      2. $> qstat -s z -u '*'
      3. $> qstat -j SGE_JOBID
      4. ON CREAM CE TO see if job was accepcted adn registered in batch system $> blah_job_registry_dump

  1. Checking published queues from UI:

[dongiovanni@emitestbed08 ~]$ ldapsearch -x -H  ldap://emitestbed21.cnaf.infn.it:2170/ -b mds-vo-name=resource,o=grid |grep emitesters
GlueClusterService: emitestbed21.cnaf.infn.it:8443/cream-sge-emitesters
# emitestbed21.cnaf.infn.it:8443/cream-sge-emitesters, resource, grid
dn: GlueCEUniqueID=emitestbed21.cnaf.infn.it:8443/cream-sge-emitesters,Mds-Vo-
GlueCEUniqueID: emitestbed21.cnaf.infn.it:8443/cream-sge-emitesters
GlueCEName: emitesters
# emitestbed21.cnaf.infn.it:8443/cream-sge-emitesters, resource, grid
# testers.eu-emi.eu, emitestbed21.cnaf.infn.it:8443/cream-sge-emitesters, res
 it:8443/cream-sge-emitesters,Mds-Vo-name=resource,o=grid
# cvitbdpm1.cern.ch, emitestbed21.cnaf.infn.it:8443/cream-sge-emitesters, res
 stbed21.cnaf.infn.it:8443/cream-sge-emitesters,Mds-Vo-name=resource,o=grid
GlueCESEBindCEUniqueID: emitestbed21.cnaf.infn.it:8443/cream-sge-emitesters
# lxbra1910.cern.ch, emitestbed21.cnaf.infn.it:8443/cream-sge-emitesters, res
 stbed21.cnaf.infn.it:8443/cream-sge-emitesters,Mds-Vo-name=resource,o=grid
GlueCESEBindCEUniqueID: emitestbed21.cnaf.infn.it:8443/cream-sge-emitesters

i. direct submission from UI:

    1. glite-ce-job-submit -d -r emitestbed21.cnaf.infn.it:8443/cream-sge-emitesters -a test.jdl
    2. glite-ce-job-status https://emitestbed21.cnaf.infn.it:8443/CREAM786673019

  1. Other basic test on CREAM CE was done testing job submission as reported at this page EMI gLite Job Management Tests Page



-- DaniloDongiovanni - 10-Feb-2012

Edit | Attach | Watch | Print version | History: r24 < r23 < r22 < r21 < r20 | Backlinks | Raw View | WYSIWYG | More topic actions
Topic revision: r24 - 2012-05-14 - DaniloDongiovanniExCern
 
    • Cern Search Icon Cern Search
    • TWiki Search Icon TWiki Search
    • Google Search Icon Google Search

    EMI All webs login

This site is powered by the TWiki collaboration platform Powered by PerlCopyright &© 2008-2024 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
or Ideas, requests, problems regarding TWiki? use Discourse or Send feedback