/*
** Job Arranger for ZABBIX
** Copyright (C) 2025 Daiwa Institute of Research Ltd. All Rights Reserved.
**
** This program is free software; you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation; either version 2 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
**/

package main

import (
	"errors"
	"fmt"
	"jobarranger2/src/jobarg_agent/managers/agent_manager/agentutils"
	flock "jobarranger2/src/libs/golibs/filelock"
	"jobarranger2/src/libs/golibs/logger/logger"
	wm "jobarranger2/src/libs/golibs/worker_manager"
	"os"
	"path/filepath"
	"runtime"
	"strings"
	"time"
)

// This is a daemon goroutine to perform the following tasks:
//   - check PIDs
//   - check abort flags
func monitorJobs() {
	fn := "monitorJobs"
	logger.WriteLog("JAAGENTMNTJOB000001", fn)
	for {
		if runtime.GOOS == "linux" {
			select {
			case <-wm.Wm.Ctx.Done():
				logger.WriteLog("JAAGENTMNTJOB000002", fn, monitorJobsId)
				return
			case <-time.After(1 * time.Second):
				wm.Wm.MonitorChan <- monitorJobsId
			}
		}

		if err := checkAbortFlags(); err != nil {
			logger.WriteLog("JAAGENTMNTJOB200003", fn, err)
		}

		if err := checkJobPIDs(); err != nil {
			logger.WriteLog("JAAGENTMNTJOB200002", fn, err)
		}

		time.Sleep(1 * time.Second)
	}
}

// check if the PIDs for each job is still alive. Otherwise, prepare to send job result
func checkAbortFlags() error {
	fn := "checkAbortFlags"

	logger.WriteLog("JAAGENTMNTJOB400008", fn)

	// iterate flag files inside the abort/ folder
	targetFolder := AbortFolderPath
	entries, err := os.ReadDir(targetFolder)

	if err != nil {
		return fmt.Errorf("failed to read dir '%s': %v", targetFolder, err)
	}

	logger.WriteLog("JAAGENTMNTJOB400002", fn, targetFolder, len(entries))

	for _, entry := range entries {
		// skipping dir
		if entry.IsDir() {
			continue
		}

		flagFilePath := filepath.Join(targetFolder, entry.Name())

		if err := processAbortJob(entry.Name()); err != nil {
			logger.WriteLog("JAAGENTMNTJOB200001", fn, flagFilePath, err)
			continue
		}

	}

	return nil
}

func processAbortJob(jobID string) error {
	fn := "processAbortJob"
	logger.WriteLog("JAAGENTMNTJOB400009", fn, jobID)

	matches, err := agentutils.FindMatchingFiles(ExecFolderPath, fmt.Sprintf("%s-*.job", jobID))
	if err != nil {
		return fmt.Errorf("failed to find job file in folder '%s' [jobid=%s]: %v", ExecFolderPath, jobID, err)
	}

	if len(matches) > 0 {
		logger.WriteLog("JAAGENTMNTJOB400010", fn, jobID, matches)

		filePath := matches[0]
		pid, err := agentutils.ExtractPIDFromFilename(filePath)
		if err != nil {
			return fmt.Errorf("failed to extract pid from file '%s' [jobid=%s]: %v", filePath, jobID, err)
		}

		abortJobFilePath := filepath.Join(AbortFolderPath, jobID)

		// Process no longer exists, do not need to abort
		if !agentutils.ProcessExists(pid) {
			logger.WriteLog("JAAGENTMNTJOB000006", fn, pid, jobID)

			// Remove abort job file
			if err := os.Remove(abortJobFilePath); err != nil {
				return fmt.Errorf("failed to remove abort job file '%s' [jobid=%s] when pid does not exist: %v", abortJobFilePath, jobID, err)
			}

			return nil
		}

		// killing the process
		if err := agentutils.KillProcessIfExists(pid); err != nil {
			return fmt.Errorf("failed to kill process '%d' [jobid=%s]: %v", pid, jobID, err)
		}

		// Get jobrun transaction file path
		jobRunFileName := strings.TrimSuffix(filepath.Base(filePath), fmt.Sprintf("-%d.job", pid)) + ".json"
		jobRunFilePath := filepath.Join(DataFolderPath, jobRunFileName)

		// Create jobresult file
		if err := createJobResultFile(jobRunFilePath, filePath, true); err != nil {
			return fmt.Errorf("failed to create job result file [jobid=%s]: %v", jobID, err)
		}

		if err := os.Remove(abortJobFilePath); err != nil {
			return fmt.Errorf("failed to remove abort job file '%s' [jobid=%s] after creating jobresult file: %v", abortJobFilePath, jobID, err)
		}

		// Get lock file path
		lockFilePath := filepath.Join(LockFolderPath, strings.TrimSuffix(filepath.Base(filePath), ".job")+".lock")
		if err := os.Remove(lockFilePath); err != nil {
			return fmt.Errorf("failed to remove lock file '%s' [jobid=%s] after creating jobresult file: %v", lockFilePath, jobID, err)
		}

		logger.WriteLog("JAAGENTMNTJOB000005", fn, jobID)

	} else {
		// no related .job files found for jobid to be aborted
		logger.WriteLog("JAAGENTMNTJOB400011", fn, jobID, ExecFolderPath)
	}

	logger.WriteLog("JAAGENTMNTJOB400012", fn, jobID)

	return nil
}

// check if the PIDs for each job is still alive. Otherwise, prepare to send job result
func checkJobPIDs() error {
	fn := "checkJobPIDs"

	logger.WriteLog("JAAGENTMNTJOB400001", fn)

	// iterate .job files inside the exec/ folder
	entries, err := os.ReadDir(ExecFolderPath)

	if err != nil {
		return fmt.Errorf("failed to read dir '%s': %v", ExecFolderPath, err)
	}

	logger.WriteLog("JAAGENTMNTJOB400002", fn, ExecFolderPath, len(entries))

	for _, entry := range entries {
		// skipping dir and not .job files
		if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".job") {
			continue
		}

		jobFilePath := filepath.Join(ExecFolderPath, entry.Name())

		if err := processJobFileForPIDCheck(jobFilePath); err != nil {
			logger.WriteLog("JAAGENTMNTJOB200001", fn, jobFilePath, err)
			continue
		}

	}

	return nil
}

func processJobFileForPIDCheck(jobFilePath string) error {
	fn := "processJobFileForPIDCheck"

	logger.WriteLog("JAAGENTMNTJOB400003", fn, jobFilePath)

	// Get lock file path
	lockFilePath := filepath.Join(LockFolderPath, strings.TrimSuffix(filepath.Base(jobFilePath), ".job")+".lock")

	isLocked, err := isFileLocked(lockFilePath)
	if err != nil {
		logger.WriteLog("JAAGENTMNTJOB400013", fn, jobFilePath, err)
	}

	if isLocked {
		logger.WriteLog("JAAGENTMNTJOB400014", fn, jobFilePath)
		return nil
	}

	// parse the pid from the .job file with format jobid-datetime-pid.job. e.g. 13826-20251117000747771860442-1046648.job
	pid, err := agentutils.ExtractPIDFromFilename(jobFilePath)
	if err != nil {
		return fmt.Errorf("in %s(), failed to extract pid from job file '%s': %v", fn, jobFilePath, err)
	}

	// check if the pid exists
	// if exists, skip to the next .job file
	if agentutils.ProcessExists(pid) {
		logger.WriteLog("JAAGENTMNTJOB400004", fn, jobFilePath, pid)
		return nil
	}

	logger.WriteLog("JAAGENTMNTJOB400005", fn, pid, jobFilePath)

	// if pid not exists, check the existence of .job file again
	if !agentutils.FileExists(jobFilePath) {
		logger.WriteLog("JAAGENTMNTJOB400006", fn, jobFilePath)
		return nil
	}

	// process no longer exists, but the job file remains under /exec folder
	logger.WriteLog("JAAGENTMNTJOB000003", fn, pid, jobFilePath, agentutils.ExecFolder)

	// if .job file exists, continue to creating job result json file
	// Get .json file path
	jsonFileName := strings.TrimSuffix(filepath.Base(jobFilePath), fmt.Sprintf("-%d.job", pid)) + ".json"
	jsonFilePath := filepath.Join(DataFolderPath, jsonFileName)

	logger.WriteLog("JAAGENTMNTJOB400007", fn, jobFilePath, jsonFilePath)

	// Create jobresult file
	if err := createJobResultFile(jsonFilePath, jobFilePath, false); err != nil {
		return fmt.Errorf("in %s(), failed to create job result file: %v", fn, err)
	}

	return nil

}

func isFileLocked(filePath string) (bool, error) {
	f, err := os.OpenFile(filePath, os.O_RDWR, 0644)
	if err != nil {
		if errors.Is(err, os.ErrNotExist) {
			return false, nil
		}
		return false, fmt.Errorf("open file error: %w", err)
	}
	defer f.Close()

	// Try to acquire a non-blocking shared lock
	err = flock.LockFile(int(f.Fd()), flock.LOCKFILE_FAIL_ON_LOCK)
	if err != nil {
		return true, fmt.Errorf("failed to lock file '%s': %v", filePath, err) // file is locked by someone else
	}

	// Unlock immediately, since we just wanted to check
	flock.UnlockFile(int(f.Fd()))
	return false, nil
}
