I hate alert systems that send out so many alerts that they are relegated to an unmonitored folder. That makes the alerts of no use. However, it is useful to have alerts for failed jobs.
Here is some simple code for SQL Server Agent that monitors the execution log. This code can run every few minutes (or hours) to check for alerts and alert resets. It only reads msdb.dbo.sysjobs_view and employs a user log table. It monitors all SQL Agent jobs.
This alert application, will only send out alerts when a job first fails, and will suppress subsequent alerts until the job runs successfully (resets alert). It will also send out an "OK Now" alert after a "NOK" alert once the job successfully runs.
This way, you should not need to ignore any "NOK" alerts. Also, the only success alerts will be the "OK Now" once a failed job completes successfully.
Here is the raw code. I suggest you use it to create a stored procedure and tailor it for the tolerances you prefer. MyDev, is your own work databse.
USE MyDev
go
CREATE TABLE [dbo].MyAlertLog (
[MyLogDateTime] [datetime] NOT NULL DEFAULT getdate(),
[JobId] [uniqueidentifier] NOT NULL,
[JobName] [varchar](512) NOT NULL,
[AlertStatus] [varchar](25) NOT NULL,
CONSTRAINT [PK_JobAlertLog] PRIMARY KEY CLUSTERED
( [MyLogDateTime] ASC,
[JobId] ASC
) ) ON [PRIMARY]
GO
delete from MyDev..MyAlertLog
where MyLogDateTime<getdate()-365;
if object_id('tempdb..#Jobs') is not null
drop table #Jobs;
if object_id('tempdb..#RunStatus') is not null
drop table #RunStatus;
select --get list of all candidate jobs
j.job_id
,j.name
into #Jobs
from msdb.dbo.sysjobs_view j
where j.enabled=1
declare @JobId as uniqueidentifier
,@JobName as varchar(512)
,@LastRunStatus as varchar(25)
,@MaxRunStatus as int
,@MySubject as nvarchar(255)
,@MyBody as varchar(8000)
,@LastSuccessfulRun datetime
,@MyRecipients varchar(8000);
set @MyRecipients = 'Me@MyCompany.com;Someone@TheirCompany.com';
if cursor_status('global','MyJobsCursor')>=-1
begin
deallocate MyJobsCursor;
end
declare MyJobsCursor cursor for
select * from #Jobs;
open MyJobsCursor;
fetch next from MyJobsCursor
into @JobId, @JobName;
while @@FETCH_STATUS = 0
begin
select @LastRunStatus=
(select top 1
case when h.run_status=1 then 'Success' else 'Failure' end JobStatus
from msdb.dbo.sysjobs j
inner join msdb.dbo.sysjobhistory h
on j.job_id=h.job_id
where j.job_id=@JobId
and h.run_date>=convert(int,convert(varchar(10),getdate()-3,112))
and step_id=0
order by h.run_date desc, h.run_time desc);
if @LastRunStatus='Failure'
and --Check that this is first Failure so that repeated Failure alerts aren't sent out
((select top 1 AlertStatus from MyDev..MyAlertLog where JobId=@JobId order by MyLogDateTime desc)='Success'
or not exists (select top 1 AlertStatus from MyDev..MyAlertLog where JobId=@JobId))
begin --Failure. Check for passing threshold first
select top 3 h.run_status --Top 2, so that, at least 2 job failures before alerting
into #RunStatus
from msdb.dbo.sysjobs j
inner join msdb.dbo.sysjobhistory h
on j.job_id=h.job_id
where j.job_id=@JobId
and h.run_date>=convert(int,convert(varchar(10),getdate()-1,112))
and step_id=0
order by h.run_date desc, h.run_time desc;
select @MaxRunStatus=max(run_status) from #RunStatus;
if @MaxRunStatus=0
begin --Failure alert
set @MySubject= 'MyApplication Job Failure Alert:'+@Jobname;
set @MyBody='Last successful run date (yyyymmdd) =' +(select top 1 convert(varchar(20),run_date)+', time (hhmmss)='+convert(varchar(20),run_time)
from msdb..sysjobhistory
where job_id=@JobId
and run_status=1
order by run_date desc, run_time desc);
if @MyBody is null
select @MyBody='Last successful run unknown';
EXEC msdb.dbo.sp_send_dbmail
@recipients = @MyRecipients,
@body = @MyBody,
@subject = @MySubject;
insert into MyDev..MyAlertLog
select getdate(),@JobId, @JobName,'Failure';
end
if object_id('tempdb..#RunStatus') is not null
drop table #RunStatus;
end
else if @LastRunStatus='Success'
and (select top 1 AlertStatus from MyDev..MyAlertLog where JobId=@JobId order by MyLogDateTime desc)='Failure'
begin --Success
set @MySubject= 'MyApplication Job Failure Alert:'+@Jobname+', NOW OK'
set @MyBody='Last successful run =' +(select top 1 'date: '+convert(varchar(20),run_date)+', time (hhmmss): '+convert(varchar(20),run_time)
from msdb..sysjobhistory
where job_id=@JobId
and run_status=1
order by run_date desc, run_time desc);
EXEC msdb.dbo.sp_send_dbmail
@recipients = @MyRecipients,
@body = @MyBody,
@subject = @MySubject;
insert into MyDev..MyAlertLog
select getdate(),@JobId, @JobName,'Success';
end
if @LastRunStatus='Failure'
and --Check that there has been a success since Failure alert
(select top 1 AlertStatus from MyDev..MyAlertLog where JobId=@JobId order by MyLogDateTime desc)='Failure'
begin
select @LastSuccessfulRun=
(select top 1 dateadd(hh,convert(int,left(right('0'+convert(varchar(6),run_time),6),2))
,dateadd(mi,convert(int,substring(right('0'+convert(varchar(6),run_time),6),3,2))
,dateadd(ss,convert(int,substring(right('0'+convert(varchar(6),run_time),6),5,2))
,convert(datetime,convert(char(8),run_date),112)
)))
from msdb..sysjobhistory
where job_id=@JobId
and run_status=1
order by run_date desc, run_time desc);
if @LastSuccessfulRun>(select top 1 MyLogDateTime from MyDev..MyAlertLog where JobId=@JobId order by MyLogDateTime desc)
--Last run was Failure, but there was a success, so send out OK
begin --Success
set @MySubject= 'MyApplication Job Failure Alert:'+@Jobname+', NOW OKish'
set @MyBody='Last successful run =' +(select top 1 'date: '+convert(varchar(20),run_date)+', time (hhmmss): '+convert(varchar(20),run_time)
from msdb..sysjobhistory
where job_id=@JobId
and run_status=1
order by run_date desc, run_time desc)
+'. Note, there have been failure(s) since.';
EXEC msdb.dbo.sp_send_dbmail
@recipients = @MyRecipients,
@body = @MyBody,
@subject = @MySubject;
insert into MyDev..MyAlertLog
select getdate(),@JobId, @JobName,'Success';
end
end
fetch next from MyJobsCursor into @JobId, @JobName;
end
close MyJobsCursor;
deallocate MyJobsCursor;
No comments:
Post a Comment