Monday, April 12, 2021

Stop Unnecessary Alerting

I hate alert systems that send out so many alerts that they are relegated to an unmonitored folder. That makes the alerts of no use. However, it is useful to have alerts for failed jobs.

Here is some simple code for SQL Server Agent that monitors the execution  log. This code can run every few minutes (or hours) to check for alerts and alert resets. It only reads msdb.dbo.sysjobs_view and employs a user log table. It monitors all SQL Agent jobs.

This alert application, will only send out alerts when a job first fails, and will suppress subsequent alerts until the job runs successfully (resets alert). It will also send out an "OK Now" alert after a "NOK" alert once the job successfully runs.

This way, you should not need to ignore any "NOK" alerts. Also, the only success alerts will be the "OK Now" once a failed job completes successfully.

Here is the raw code. I suggest you use it to create a stored procedure and tailor it for the tolerances you prefer. MyDev, is your own work databse.

USE MyDev

go 

CREATE TABLE [dbo].MyAlertLog (

       [MyLogDateTime] [datetime] NOT NULL DEFAULT getdate(),

       [JobId] [uniqueidentifier] NOT NULL,

       [JobName] [varchar](512) NOT NULL,

       [AlertStatus] [varchar](25) NOT NULL,

        CONSTRAINT [PK_JobAlertLog] PRIMARY KEY CLUSTERED

       (      [MyLogDateTime] ASC,

              [JobId] ASC

       ) ) ON [PRIMARY]

GO 

delete from MyDev..MyAlertLog

       where MyLogDateTime<getdate()-365;

if object_id('tempdb..#Jobs') is not null

    drop table #Jobs;

if object_id('tempdb..#RunStatus') is not null

    drop table #RunStatus

select --get list of all candidate jobs

               j.job_id

              ,j.name

       into #Jobs

       from msdb.dbo.sysjobs_view j

       where j.enabled=1

declare @JobId as uniqueidentifier       

              ,@JobName as varchar(512)         

              ,@LastRunStatus as varchar(25)    

              ,@MaxRunStatus as int                    

              ,@MySubject as nvarchar(255)      

              ,@MyBody as varchar(8000)         

              ,@LastSuccessfulRun datetime      

              ,@MyRecipients varchar(8000);

set @MyRecipients = 'Me@MyCompany.com;Someone@TheirCompany.com'; 

if cursor_status('global','MyJobsCursor')>=-1 

       begin

              deallocate MyJobsCursor;

       end

declare MyJobsCursor cursor for

       select * from #Jobs;

open MyJobsCursor;

fetch next from MyJobsCursor

       into @JobId, @JobName;

while @@FETCH_STATUS = 0 

       begin

              select @LastRunStatus=

                      (select top 1

                                           case when h.run_status=1 then 'Success' else 'Failure' end JobStatus

                             from msdb.dbo.sysjobs j

                                    inner join msdb.dbo.sysjobhistory h

                                           on j.job_id=h.job_id

                             where  j.job_id=@JobId

                               and h.run_date>=convert(int,convert(varchar(10),getdate()-3,112))

                               and step_id=0

                             order by h.run_date desc, h.run_time desc);

              if @LastRunStatus='Failure'

                      and --Check that this is first Failure so that repeated Failure alerts aren't sent out

                             ((select top 1 AlertStatus from MyDev..MyAlertLog where JobId=@JobId order by MyLogDateTime desc)='Success'

                              or not exists (select top 1 AlertStatus from MyDev..MyAlertLog where JobId=@JobId))

                      begin --Failure. Check for passing threshold first

                             select top 3 h.run_status --Top 2, so that, at least 2 job failures before alerting

                                           into #RunStatus

                                    from msdb.dbo.sysjobs j

                                           inner join msdb.dbo.sysjobhistory h

                                                  on j.job_id=h.job_id

                                    where  j.job_id=@JobId

                                           and h.run_date>=convert(int,convert(varchar(10),getdate()-1,112))

                                           and step_id=0

                                    order by h.run_date desc, h.run_time desc;

                             select @MaxRunStatus=max(run_status) from #RunStatus;

                             if @MaxRunStatus=0

                                    begin --Failure alert

                                           set @MySubject= 'MyApplication Job Failure Alert:'+@Jobname;

                                           set @MyBody='Last successful run date (yyyymmdd) =' +(select top 1 convert(varchar(20),run_date)+', time (hhmmss)='+convert(varchar(20),run_time)

                                                                        from msdb..sysjobhistory

                                                                        where job_id=@JobId

                                                                          and run_status=1

                                                                        order by run_date desc, run_time desc);

                                           if @MyBody is null

                                                  select @MyBody='Last successful run unknown';

                                           EXEC msdb.dbo.sp_send_dbmail  

                                                  @recipients = @MyRecipients, 

                                                  @body = @MyBody, 

                                                  @subject = @MySubject;

                                           insert into MyDev..MyAlertLog

                                                  select getdate(),@JobId, @JobName,'Failure';

                                    end

                             if object_id('tempdb..#RunStatus') is not null

                                    drop table #RunStatus;

                      end

              else if @LastRunStatus='Success'

                      and (select top 1 AlertStatus from MyDev..MyAlertLog where JobId=@JobId order by MyLogDateTime desc)='Failure'

                      begin --Success

                             set @MySubject= 'MyApplication Job Failure Alert:'+@Jobname+', NOW OK'

                             set @MyBody='Last successful run =' +(select top 1 'date: '+convert(varchar(20),run_date)+', time (hhmmss): '+convert(varchar(20),run_time)

                                                          from msdb..sysjobhistory

                                                          where job_id=@JobId

                                                                 and run_status=1

                                                          order by run_date desc, run_time desc);

                             EXEC msdb.dbo.sp_send_dbmail  

                                           @recipients = @MyRecipients,  

                                           @body = @MyBody, 

                                           @subject = @MySubject;

                             insert into MyDev..MyAlertLog

                                    select getdate(),@JobId, @JobName,'Success';

                      end

              if @LastRunStatus='Failure'

                      and --Check that there has been a success since Failure alert

                             (select top 1 AlertStatus from MyDev..MyAlertLog where JobId=@JobId order by MyLogDateTime desc)='Failure'

                      begin 

                             select @LastSuccessfulRun=

                                    (select top 1 dateadd(hh,convert(int,left(right('0'+convert(varchar(6),run_time),6),2))

                                                  ,dateadd(mi,convert(int,substring(right('0'+convert(varchar(6),run_time),6),3,2))      

                                                  ,dateadd(ss,convert(int,substring(right('0'+convert(varchar(6),run_time),6),5,2))      

                                                                        ,convert(datetime,convert(char(8),run_date),112)

                                                                        )))

                                                                 from msdb..sysjobhistory

                                                                 where job_id=@JobId

                                                                        and run_status=1

                                                                 order by run_date desc, run_time desc);

                             if @LastSuccessfulRun>(select top 1 MyLogDateTime from MyDev..MyAlertLog where JobId=@JobId order by MyLogDateTime desc)

                                    --Last run was Failure, but there was a success, so send out OK

                                    begin --Success

                                           set @MySubject= 'MyApplication Job Failure Alert:'+@Jobname+', NOW OKish'

                                           set @MyBody='Last successful run =' +(select top 1 'date: '+convert(varchar(20),run_date)+', time (hhmmss): '+convert(varchar(20),run_time)

                                                                        from msdb..sysjobhistory

                                                                        where job_id=@JobId

                                                                               and run_status=1

                                                                        order by run_date desc, run_time desc)

                                                                        +'. Note, there have been failure(s) since.';

                                           EXEC msdb.dbo.sp_send_dbmail  

                                                  @recipients = @MyRecipients, 

                                                  @body = @MyBody, 

                                                  @subject = @MySubject;

                                           insert into MyDev..MyAlertLog

                                                  select getdate(),@JobId, @JobName,'Success';

                                    end 

                      end

              fetch next from MyJobsCursor into @JobId, @JobName;

       end

close MyJobsCursor;

deallocate MyJobsCursor;

 


No comments: