fix: simplify remote SSH commands for reliability

2026-05-22 13:34:08 +08:00 · 2026-05-22 13:34:08 +08:00 · 13a7718a3c
commit 13a7718a3c
parent ad6112f4ab
1 changed files with 50 additions and 71 deletions
--- a/src/modules/admin-servers/admin-servers.service.ts
+++ b/src/modules/admin-servers/admin-servers.service.ts
@ -7,7 +7,7 @@ const execAsync = promisify(exec);

 interface ServerMetrics {
  hostname: string;
-  cpu: { model: string; cores: number; usagePercent: number; loadAvg: number[] };
+  cpu: { model: string; cores: number; usagePercent: number };
  memory: { total: string; used: string; free: string; percent: number };
  disk: { total: string; used: string; free: string; percent: number };
  uptime: string;
@ -15,8 +15,8 @@ interface ServerMetrics {
  network: { ip: string };
 }

-const REMOTE_SSH = 'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 -i /home/ubuntu/.ssh/wangdl.pem ubuntu@10.2.0.7';
 const SSH_KEY_PATH = process.env.SSH_KEY_PATH || '/home/ubuntu/.ssh/wangdl.pem';
+const REMOTE_HOST = '10.2.0.7';

@Injectable()
 export class AdminServersService {
@ -27,98 +27,81 @@ export class AdminServersService {
    const totalMem = os.totalmem();
    const freeMem = os.freemem();
    const usedMem = totalMem - freeMem;
-
-    // CPU usage (approximate via load avg vs cores)
    const loadAvg = os.loadavg();
-    const cpuUsage = Math.round((loadAvg[0] / cpus.length) * 100);
+    const cpuUsage = Math.min(100, Math.round((loadAvg[0] / cpus.length) * 100));

-    // Disk
    let disk = { total: '-', used: '-', free: '-', percent: 0 };
    try {
      const { stdout } = await execAsync("df -h / | tail -1 | awk '{print $2,$3,$4,$5}'");
-      const [total, used, free, pct] = stdout.trim().split(/\s+/);
-      disk = { total, used, free, percent: parseInt(pct) || 0 };
+      const parts = stdout.trim().split(/\s+/);
+      disk = { total: parts[0] || '-', used: parts[1] || '-', free: parts[2] || '-', percent: parseInt(parts[3]) || 0 };
    } catch {}

-    // Top processes
    let processes: ServerMetrics['processes'] = [];
    try {
      const { stdout } = await execAsync("ps aux --sort=-%mem --no-headers | head -8 | awk '{print $2,$3,$4,$11}'");
-      processes = stdout.trim().split('\n').map(line => {
+      processes = stdout.trim().split('\n').filter(Boolean).map(line => {
        const [pid, cpu, mem, ...cmd] = line.trim().split(/\s+/);
-        return { pid: parseInt(pid), cpu: cpu + '%', mem: mem + '%', command: cmd.join(' ').slice(0, 60) };
+        return { pid: parseInt(pid), cpu: cpu + '%', mem: mem + '%', command: (cmd || []).join(' ').slice(0, 50) };
      });
    } catch {}

-    // Network IPs
    const nets = os.networkInterfaces();
    const ip = Object.values(nets).flat().find(n => n?.family === 'IPv4' && !n.internal)?.address || 'unknown';
-
-    // Uptime
-    const uptimeSeconds = os.uptime();
-    const d = Math.floor(uptimeSeconds / 86400);
-    const h = Math.floor((uptimeSeconds % 86400) / 3600);
-    const m = Math.floor((uptimeSeconds % 3600) / 60);
-    const uptime = `${d}d ${h}h ${m}m`;
+    const d = Math.floor(os.uptime() / 86400);
+    const h = Math.floor((os.uptime() % 86400) / 3600);
+    const m = Math.floor((os.uptime() % 3600) / 60);

    return {
      hostname: os.hostname(),
-      cpu: { model: cpus[0]?.model || '', cores: cpus.length, usagePercent: cpuUsage, loadAvg },
+      cpu: { model: cpus[0]?.model || '', cores: cpus.length, usagePercent: cpuUsage },
      memory: { total: (totalMem / 1e9).toFixed(1) + 'G', used: (usedMem / 1e9).toFixed(1) + 'G', free: (freeMem / 1e9).toFixed(1) + 'G', percent: Math.round((usedMem / totalMem) * 100) },
-      disk, uptime, processes,
+      disk, uptime: `${d}d ${h}h ${m}m`, processes,
      network: { ip },
    };
  }

  async getRemoteMetrics(): Promise<ServerMetrics | null> {
    try {
-      const sshKey = SSH_KEY_PATH;
-      const cmd = `${REMOTE_SSH} 'echo "HOST=$(hostname)"; echo "IP=$(hostname -I | awk '"'"'{print \$1}'"'"')"; echo "UPTIME=$(uptime -p)"; top -bn1 | head -1; free -h | grep Mem; df -h / | tail -1; ps aux --sort=-%mem --no-headers | head -6 | awk '"'"'{print \$2,\$3,\$4,\$11}'"'"'`;
-      const { stdout } = await execAsync(cmd.replace('ssh -o', `ssh -i ${sshKey} -o`), { timeout: 8000 });
-      const lines = stdout.trim().split('\n');
+      const base = `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST}`;
+      const cmds = [
+        `${base} hostname`,
+        `${base} "hostname -I | awk '{print \\$1}'"`,
+        `${base} "cat /proc/loadavg | awk '{print \\$1}'"`,
+        `${base} "cat /proc/cpuinfo | grep processor | wc -l"`,
+        `${base} "free -m | grep Mem | awk '{print \\$2,\\$3,\\$4}'"`,
+        `${base} "df -h / | tail -1 | awk '{print \\$2,\\$3,\\$4,\\$5}'"`,
+        `${base} "uptime -p | sed 's/up //'"`,
+        `${base} "ps aux --sort=-%mem --no-headers | head -6 | awk '{print \\$2,\\$3,\\$4,\\$11}'"`,
+      ];

-      const hostname = lines.find(l => l.startsWith('HOST='))?.split('=')[1] || 'remote';
-      const ip = lines.find(l => l.startsWith('IP='))?.split('=')[1] || '10.2.0.7';
-      const uptimeStr = lines.find(l => l.startsWith('UPTIME='))?.split('=')[1]?.replace('up ', '') || '';
+      const results = await Promise.all(cmds.map(c => execAsync(c, { timeout: 5000 }).then(r => r.stdout.trim()).catch(() => '')));

-      // top output: "load average: 0.08, 0.03, 0.01"
-      const topLine = lines.find(l => l.includes('load average')) || '';
-      const loadMatch = topLine.match(/load average: ([\d.]+), ([\d.]+), ([\d.]+)/);
-      const loadAvg = loadMatch ? [parseFloat(loadMatch[1]), parseFloat(loadMatch[2]), parseFloat(loadMatch[3])] : [0, 0, 0];
+      const hostname = results[0] || 'remote';
+      const ip = results[1] || '10.2.0.7';
+      const load1 = parseFloat(results[2]) || 0;
+      const cores = parseInt(results[3]) || 4;
+      const cpuUsage = Math.min(100, Math.round((load1 / cores) * 100));

-      // memory
-      const memLine = lines.find(l => /Mem:/.test(l)) || '';
-      const memParts = memLine.replace('Mem:', '').trim().split(/\s+/);
+      const memParts = results[4].split(/\s+/);
+      const memTotal = memParts[0] ? (parseInt(memParts[0]) / 1024).toFixed(1) + 'G' : '-';
+      const memUsed = memParts[1] ? (parseInt(memParts[1]) / 1024).toFixed(1) + 'G' : '-';
+      const memFree = memParts[2] ? (parseInt(memParts[2]) / 1024).toFixed(1) + 'G' : '-';
+      const memPercent = memParts[0] && memParts[1] ? Math.round((parseInt(memParts[1]) / parseInt(memParts[0])) * 100) : 0;

-      // disk
-      const diskLine = lines.find(l => /\/$/.test(l) || l.includes('/ ')) || '';
-      const diskParts = diskLine.trim().split(/\s+/);
+      const diskParts = results[5].split(/\s+/);
+      const diskPercent = parseInt(diskParts[3]) || 0;

-      // processes
-      const procLines = lines.filter(l => /^\d+\s/.test(l));
-
-      const cpuUsage = Math.round((loadAvg[0] / 4) * 100); // assume 4 cores
+      const processes = results[7].split('\n').filter(Boolean).map(line => {
+        const [pid, cpu, mem, ...cmd] = line.trim().split(/\s+/);
+        return { pid: parseInt(pid), cpu: cpu + '%', mem: mem + '%', command: (cmd || []).join(' ').slice(0, 50) };
+      });

      return {
-        hostname,
-        cpu: { model: 'Intel Xeon (Lighthouse)', cores: 4, usagePercent: cpuUsage, loadAvg },
-        memory: {
-          total: memParts[1] || '-',
-          used: memParts[2] || '-',
-          free: memParts[3] || '-',
-          percent: loadAvg[0] > 0 ? Math.round(cpuUsage) : 0,
-        },
-        disk: {
-          total: diskParts[1] || '-',
-          used: diskParts[2] || '-',
-          free: diskParts[3] || '-',
-          percent: parseInt(diskParts[4]) || 0,
-        },
-        uptime: uptimeStr,
-        processes: procLines.map(line => {
-          const [pid, cpu, mem, ...cmd] = line.trim().split(/\s+/);
-          return { pid: parseInt(pid), cpu: cpu + '%', mem: mem + '%', command: (cmd || []).join(' ').slice(0, 60) };
-        }),
+        hostname, cpu: { model: 'Intel Xeon (Lighthouse)', cores, usagePercent: cpuUsage },
+        memory: { total: memTotal, used: memUsed, free: memFree, percent: memPercent },
+        disk: { total: diskParts[0] || '-', used: diskParts[1] || '-', free: diskParts[2] || '-', percent: diskPercent },
+        uptime: results[6] || '-', processes,
        network: { ip },
      };
    } catch (err: any) {
@ -128,15 +111,11 @@ export class AdminServersService {
  }

  async getAllMetrics() {
-    const [local, remote] = await Promise.all([
-      this.getLocalMetrics(),
-      this.getRemoteMetrics(),
-    ]);
-    return {
-      servers: [
+    const [local, remote] = await Promise.all([this.getLocalMetrics(), this.getRemoteMetrics()]);
+    const servers = [
      { name: '蜂驰云 8核32G', role: '生产核心', ...local },
-        ...(remote ? [{ name: '轻量云 4核4G', role: '工具/辅助', ...remote }] : []),
-      ],
-    };
+    ];
+    if (remote) servers.push({ name: '轻量云 4核4G', role: '工具/辅助', ...remote });
+    return { servers };
  }
 }