find_host_queue.sh
寻找 hostname 所在的 queue :
find_host_queue.shview raw1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
| #!/bin/bash
if [ -z "$1" ]; then echo "用法: $0 <hostname>" exit 1 fi
HOSTNAME=$1
echo "正在查找包含主机 [$HOSTNAME] 或其主机组的队列..."
queues=$(bqueues | awk 'NR>1 {print $1}')
for q in $queues; do hosts_line=$(bqueues -l "$q" | awk '/HOSTS:/,/^$/') host_array=($hosts_line)
for entry in "${host_array[@]}"; do if [[ $entry == @* || $entry == */ ]]; then group=${entry#@} group=${entry%/} if bmgroup $group 2>/dev/null | grep -qw "$HOSTNAME"; then echo "✅ 队列 [$q] 包含主机组 [$group],其中包括主机 [$HOSTNAME]" fi else if [[ "$entry" == "$HOSTNAME" ]]; then echo "✅ 队列 [$q] 直接包含主机 [$HOSTNAME]" fi fi done done
|
auto_submit_to_host_queue.sh
根据提供的 hostname ,自动寻找最空闲的队列提交 bsub 作业:
auto_submit_to_host_queue.shview raw1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
| #!/bin/bash
if [ $# -lt 2 ]; then echo "用法: $0 <hostname> <args>" exit 1 fi
HOSTNAME=$1
echo "🔍 正在查找包含主机 [$HOSTNAME] 的队列..."
declare -A matching_queues
queues=$(bqueues | awk 'NR>1 {print $1}')
for q in $queues; do hosts_line=$(bqueues -l "$q" | awk '/HOSTS:/,/^$/') host_array=($hosts_line)
for entry in "${host_array[@]}"; do if [[ $entry == @* || $entry == */ ]]; then group=${entry#@} group=${entry%/} if bmgroup "$group" 2>/dev/null | grep -qw "$HOSTNAME"; then echo "✅ 队列 [$q] 包含主机组 [$group],其中包括主机 [$HOSTNAME]" matching_queues["$q"]=1 fi elif [[ "$entry" == "$HOSTNAME" ]]; then echo "✅ 队列 [$q] 直接包含主机 [$HOSTNAME]" matching_queues["$q"]=1 fi done done
if [ ${#matching_queues[@]} -eq 0 ]; then echo "❌ 没有找到包含主机 [$HOSTNAME] 的队列。" exit 1 fi
echo "✅ 找到以下队列包含主机 [$HOSTNAME]:" for q in "${!matching_queues[@]}"; do echo " - $q" done
best_queue="" min_pend=999999
sorted_queues=$(for q in "${!matching_queues[@]}"; do pend=$(bqueues "$q" | awk 'NR==2 {print $9}') echo "$pend $q" done | sort -n | awk '{print $2}')
for q in $sorted_queues; do echo "🔎 尝试队列 [$q]..." test_output=$(timeout 10 bsub -q "$q" -n 1 -Is /bin/true 2>&1) if [ $? -eq 124 ]; then echo "⏳ 队列 [$q] 测试超时,跳过。" continue fi if echo "$test_output" | grep -q "User cannot use the queue"; then echo "⛔ 无权限使用队列 [$q],跳过。" continue fi
echo "🚀 提交任务到队列 [$q]..." CMD="bsub -q $q -m $HOSTNAME ${@:2}" $CMD exit 0 done
echo "❌ 所有队列都无法使用或提交失败。" exit 1
|