Hardware
Server |
Public IP |
Private IP |
lxdist01 |
137.138.144.39 |
192.168.0.111 |
lxdist02 |
137.138.144.40 |
192.168.0.112 |
lxdist03 |
137.138.144.41 |
192.168.0.113 |
lxdist04 |
137.138.144.42 |
192.168.0.114 |
lxdist01-ipmi |
10.6.144.39 |
X |
lxdist02-ipmi |
10.6.144.40 |
X |
lxdist03-ipmi |
10.6.144.41 |
X |
lxdist04-ipmi |
10.6.144.52 |
X |
lxdistnas01p |
X |
192.168.0.101 |
lxdistnas02p |
X |
192.168.0.201 |
lxdistnas01 |
137.138.144.43 |
X |
lxdistnas02 |
137.138.144.44 |
X |
Installation
Kickstart
Available in SVN linuxsupport/kickstarts/lxdistXX.ks
Note : The console parameter can be found on
http://lemon.cern.ch web interface:
- search for your host
- click on the CDB template details
- click on include(serial_map_whatever)
- check parameters
Additional rpms
yum install mod_ssl apr apr-util apr-util-ldap http httpd-tools tftp-server tftp \
postgresql-docs postgresql-devel postgresql postgresql-server postgresql-libs \
perl-Frontier-RPC perl-XML-Writer createrepo mrepo \
koji-hub koji-hub-plugins koji-utils koji-plugin-sign koji koji-web \
mock mash koji-builder repoview \
rgmanager cman iscsi-initiator-utils iscsi-initiator-utils-devel mrepo \
shibboleth log4shib xmltooling-schemas opensaml-schemas mussh
Configure Lemon
See
LemonConfiguration
Configure secondary interface
Example:
DEVICE="eth1"
BOOTPROTO=none
IPADDR=192.168.0.114
NETMASK=255.255.255.0
IPV6INIT=no
HWADDR="00:30:48:F2:38:3F"
NM_CONTROLLED="yes"
TYPE="Ethernet"
UUID="872c79c5-c85c-41b8-ad21-3f5e9057cfa3"
MTU=9000
Disable yum autoupdate
/etc/sysconfig/yum-autoupdate:
YUMHOUR=4
YUMUPDATE=0
YUMUPDATESECONLY=0
YUMONBOOT=0
YUMMAIL=1
YUMMAILTO="root"
YUMRANDOMWAIT=59
YUMCLEAN=1
YUMAPPLET=4
Configure nfs
Add in /etc/fstab:
lxdistnas01p:/vol/vol1 /mnt/data1 nfs4 rw,async,noatime,soft,timeo=15,rsize=65536,wsize=65536,proto=tcp 0 0
lxdistnas02p:/vol/vol1 /mnt/data2 nfs4 rw,async,noatime,soft,timeo=15,rsize=65536,wsize=65536,proto=tcp 0 0
Configure postgres
# service postgresql initdb
Configure tftp server
/etc/xinetd.d/tftp
Configure Red Hat Cluster Suite (RHCS)
Quorum disk
On all lxdist systems: retreive iSCSI Initiator names:
# cat /etc/iscsi/initiatorname.iscsi
InitiatorName=iqn.1994-05.com.redhat:XXXXXXXXXX
On
NetApp Filers:
> iscsi start
> vol create vol2 aggr1 100m
> lun create -s 50m -t linux /vol/vol2/lun1
> igroup create -i -t linux iscsigrp
> igroup add iscsiigrp iqn.1994-05.com.redhat:XXXXXXXXXX
> lun map /vol/vol2/lun1 iscsigrp 0
(repeat 'igroup add' command for each lxdist node)
On all lxdist systems:
# chkconfig --add iscsi
# chkconfig --add iscsid
# service iscsi start
# service iscsid start
# iscsiadm -m discovery -t sendtargets -p 192.168.0.101
192.168.0.101:3260,2000 iqn.1992-08.com.netapp:sn.XXXXXXXXX
# iscsiadm -m node -L automatic
# ls -l /dev/disk/by-path/
[...]
ip-192.168.0.101:3260-iscsi-iqn.1992-08.com.netapp:sn.XXXXXXXXX-lun-0 -> ../../sdc
[...]
On single lxdist node:
# mkqdisk -c /dev/sdc -l qdisk
On all lxdist nodes check:
# mkqdisk -L
mkqdisk v3.0.12.1
/dev/block/8:32:
/dev/disk/by-id/scsi-360a980003753626b772b4261774c5836:
/dev/disk/by-id/wwn-0x60a980003753626b772b4261774c5836:
/dev/disk/by-path/ip-192.168.0.101:3260-iscsi-iqn.1992-08.com.netapp:sn.XXXXXXXXXXXX-lun-0:
/dev/sdc:
Magic: eb7a62c2
Label: qdisk
Created: Sat Sep 15 14:33:47 2012
Host: lxdist01
Kernel Sector Size: 512
Recorded Sector Size: 512
Cluster configuration file
On all lxdist nodes create:
/etc/cluster/cluster.conf
and
/etc/sysconfig/cman
(with SAME content!)
Cluster daemons / services
On all lxdist nodes:
# chkconfig --add modclusterd
# chkconfig --add ricci
# service modclusterd start
# service ricci start
Assign new ricci password on all nodes:
# passwd ricci
# chkconfig --add cman
# chkconfig --add rgmanager
# service cman start
# service rgmanager start
then check cluster functioning (after 30-90 seconds):
# clustat
Cluster Status for lxdist @ Mon Sep 17 09:04:22 2012
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
lxdist01p 1 Online, rgmanager
lxdist02p 2 Online, rgmanager
lxdist03p 3 Online, rgmanager
lxdist04p 4 Online, Local, rgmanager
/dev/block/8:32 0 Online, Quorum Disk
Service Name Owner (Last) State
------- ---- ----- ------ -----
service:lxdist lxdist01p started
service:pgsql lxdist01p started
service:rsyslog lxdist01p started
service:tsmclient lxdist01p started
sysctl values
# Neighbour table overflow in logs.
net.ipv4.neigh.default.gc_thresh3 = 4096
net.ipv4.neigh.default.gc_thresh2 = 2048
net.ipv4.neigh.default.gc_thresh1 = 1024
NetApp On Command (Only lxdist04)
Disable discovery:
[root@lxdist04 log]# dfm options list |grep -i discover
discoverAgents Enabled
discoverClusters Disabled
discoverEnabled Enabled
discoverHostInitEnabled Enabled
discoverHosts Enabled
discoverInterval 15 minutes
discoverNetworks Disabled
discoverTimeout 5 seconds
discoverVfilers Enabled
hostEnclosureDiscoveryEvents Disabled
networkDiscoveryLimit 15
serverAPILogExclude host-service-discover|dfm-about
snapshotDiscoveryEventsEnabled No
[root@lxdist04 log]# dfm options set discoverAgents=0
Changed host agent discovery to Disabled.
[root@lxdist04 log]# dfm options set discoverHosts=0
Changed host discovery to Disabled.
[root@lxdist04 log]# dfm options set discoverHostInitEnabled=0
Changed host-initiated discovery to Disabled.
[root@lxdist04 log]# dfm options set discoverEnabled=0
Changed discovery enabled to Disabled.
[root@lxdist04 log]# dfm options list |grep -i discover
discoverAgents Disabled
discoverClusters Disabled
discoverEnabled Disabled
discoverHostInitEnabled Disabled
discoverHosts Disabled
discoverInterval 15 minutes
discoverNetworks Disabled
discoverTimeout 5 seconds
discoverVfilers Enabled
hostEnclosureDiscoveryEvents Disabled
networkDiscoveryLimit 15
serverAPILogExclude host-service-discover|dfm-about
snapshotDiscoveryEventsEnabled No
[root@lxdist04 log]#
Procedure
Reboot lxdist machine
[root@lxdist ~]# /mnt/data2/sbin/dns-update --zone internal --alias lxpxeboot --iplist 137.138.144.39 137.138.144.40
[root@lxdist ~]# /mnt/data2/sbin/dns-update --zone both --alias linuxsoft --iplist 137.138.144.39 137.138.144.40
- Wait 5 minutes and reboot 137.138.144.41 and 137.138.144.42
- Verify if all services are back up.
[root@lxdist ~]# clustat
* Another DNS update:
[root@lxdist ~]# /mnt/data2/sbin/dns-update --zone internal --alias lxpxeboot --iplist 137.138.144.41 137.138.144.42
[root@lxdist ~]# /mnt/data2/sbin/dns-update --zone both --alias linuxsoft --iplist 137.138.144.41 137.138.144.42
- Wait 5 minutes and reboot 137.138.144.39 and 137.138.144.40.
- Verify if all services are back up.
[root@lxdist ~]# clustat
[root@lxdist ~]# /mnt/data2/sbin/dns-update --zone internal --alias lxpxeboot --iplist 137.138.144.39 137.138.144.40 137.138.144.41 137.138.144.42
[root@lxdist ~]# /mnt/data2/sbin/dns-update --zone both --alias linuxsoft --iplist 137.138.144.39 137.138.144.40 137.138.144.41 137.138.144.42
Change 'aims' account password
cat /etc/aims2.conf (Oracle password)
sqlplus aims2@ITCORE
select * from conf where key='LANDB_PASS';
select * from conf where key='WINSRVC_PASS';
update CONF set VALUE='PASSw0rd' where KEY='LANDB_PASS';
update CONF set VALUE='PASSw0rd' where KEY='WINSRVC_PASS';
clusvcadm -R aims2sync
Sync external repositories -> stable.
Repositories:
- mongodb
- ceph
- elasticsearch
- rdo
/mnt/data2/bin/lxdist-sync-mrepo-stable -d {mongodb,ceph,rdo,elasticsearch} -n
/mnt/data2/bin/lxdist-sync-mrepo-stable -d {mongodb,ceph,rdo,elasticsearch}
Note elasticsearch need a special version of mrepo that understand Amazon S3 : (a fix is available in /mnt/data2/bin/lxdist-sync-mrepo)
/mnt/data2/bin/lxdist-sync-mrepo -s elasticsearch-x86_64