Giter Site home page Giter Site logo

Comments (1)

manderswy avatar manderswy commented on July 17, 2024 1

In case anyone is interested, I normalized our audit tables as I suggested in the issue above. The two new tables, and the revised auditing function are provided here, for reference. For our particular database, this took the auditing system from around 70 GB to around 14 GB.

My main concern was whether this would cause performance issues, since the trigger function has to check for the existence of the client_query to avoid writing it more than once in the case of a client query that modifies multiple rows. I was pleasantly surprised that it seemed to have no noticeable impact on speed, even with a half-million rows to compare to in the client_query table. Note that I did use a little trick that likely helps, there -- I am storing the hash (via md5()) of the client_query in that table, and am comparing that with a hashed client_query in the trigger function. There's an index on client_query.client_query_hash to make that comparison go quickly. I went with that approach given how massive some of the client_query values were.

I'll close this, but wanted to provide the solution I came up with, in case it's of any use to others, or if it's something that might be considered for future development on this repo.

CREATE TABLE IF NOT EXISTS audit.client_query
(
	query_id bigint NOT NULL DEFAULT nextval('audit.client_query_query_id_seq'::regclass),
	client_query text,
	client_query_hash varchar(32),
	CONSTRAINT query_id PRIMARY KEY (query_id)
	
);

CREATE TABLE IF NOT EXISTS audit.event
(
    event_id bigint NOT NULL DEFAULT nextval('audit.event_event_id_seq'::regclass),
    schema_name text COLLATE pg_catalog."default" NOT NULL,
    table_name text COLLATE pg_catalog."default" NOT NULL,
    relid oid NOT NULL,
    session_user_name text COLLATE pg_catalog."default",
    action_tstamp_tx timestamp with time zone NOT NULL,
    action_tstamp_stm timestamp with time zone NOT NULL,
    action_tstamp_clk timestamp with time zone NOT NULL,
    transaction_id bigint,
    application_name text COLLATE pg_catalog."default",
    client_addr inet,
    client_port integer,
    action text COLLATE pg_catalog."default" NOT NULL,
    row_data hstore,
    changed_fields hstore,
    statement_only boolean NOT NULL,
    query_id bigint REFERENCES audit.client_query(query_id),
    CONSTRAINT event_pkey PRIMARY KEY (event_id),
    CONSTRAINT event_action_check CHECK (action = ANY (ARRAY['I'::text, 'D'::text, 'U'::text, 'T'::text]))
);

CREATE OR REPLACE FUNCTION audit.if_modified_func()
    RETURNS trigger
    LANGUAGE 'plpgsql'
    COST 100
    VOLATILE NOT LEAKPROOF SECURITY DEFINER
    SET search_path=pg_catalog, public
AS $BODY$
DECLARE
    client_query_row audit.client_query;
    event_row audit.event;
    include_values boolean;
    log_diffs boolean;
    hash_of_client_query varchar(32);
    h_old hstore;
    h_new hstore;
    excluded_cols text[] = ARRAY[]::text[];
BEGIN
    IF TG_WHEN <> 'AFTER' THEN
        RAISE EXCEPTION 'audit.if_modified_func() may only run as an AFTER trigger';
    END IF;
	
	hash_of_client_query = md5(current_query());
	
	client_query_row = ROW(
		nextval('audit.client_query_query_id_seq'),
		current_query(),
		hash_of_client_query
	);
		
    event_row = ROW(
        nextval('audit.event_event_id_seq'), 		  -- event_id
        TG_TABLE_SCHEMA::text,                        -- schema_name
        TG_TABLE_NAME::text,                          -- table_name
        TG_RELID,                                     -- relation OID for much quicker searches
        session_user::text,                           -- session_user_name
        current_timestamp,                            -- action_tstamp_tx
        statement_timestamp(),                        -- action_tstamp_stm
        clock_timestamp(),                            -- action_tstamp_clk
        txid_current(),                               -- transaction ID
        current_setting('application_name'),          -- client application
        inet_client_addr(),                           -- client_addr
        inet_client_port(),                           -- client_port
        substring(TG_OP,1,1),                         -- action
        NULL, NULL,                                   -- row_data, changed_fields
        'f',                                          -- statement_only
		NULL::int                                     -- NULL client_query_id we will get later on
        );

    IF NOT TG_ARGV[0]::boolean IS DISTINCT FROM 'f'::boolean THEN
        event_row.client_query = NULL;
    END IF;

    IF TG_ARGV[1] IS NOT NULL THEN
        excluded_cols = TG_ARGV[1]::text[];
    END IF;
    
    IF (TG_OP = 'UPDATE' AND TG_LEVEL = 'ROW') THEN
        event_row.row_data = hstore(OLD.*) - excluded_cols;
        event_row.changed_fields =  (hstore(NEW.*) - event_row.row_data) - excluded_cols;
        IF event_row.changed_fields = hstore('') THEN
            -- All changed fields are ignored. Skip this update.
            RETURN NULL;
        END IF;
    ELSIF (TG_OP = 'DELETE' AND TG_LEVEL = 'ROW') THEN
        event_row.row_data = hstore(OLD.*) - excluded_cols;
    ELSIF (TG_OP = 'INSERT' AND TG_LEVEL = 'ROW') THEN
        event_row.row_data = hstore(NEW.*) - excluded_cols;
    ELSIF (TG_LEVEL = 'STATEMENT' AND TG_OP IN ('INSERT','UPDATE','DELETE','TRUNCATE')) THEN
        event_row.statement_only = 't';
    ELSE
        RAISE EXCEPTION '[audit.if_modified_func] - Trigger func added as trigger for unhandled case: %, %',TG_OP, TG_LEVEL;
        RETURN NULL;
    END IF;
	IF (SELECT COUNT(*) FROM audit.client_query WHERE client_query_hash = hash_of_client_query) THEN --IF THE QUERY ALREADY EXISTS, JUST LINK TO IT
		SELECT query_id FROM audit.client_query WHERE client_query_hash = hash_of_client_query INTO event_row.query_id;
	ELSE --IF THIS QUERY DOESN'T ALREADY EXIST, WRITE IT
		INSERT INTO audit.client_query VALUES (client_query_row.*) RETURNING query_id INTO event_row.query_id;
	END IF;
	INSERT INTO audit.event VALUES (event_row.*);
    RETURN NULL;
END;
$BODY$;


from audit-trigger.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.