Hi Team,
I am trying to implement deduplication on a rowstore table.
Requirement: If a insert happens twice on same day for same user id, username value should be updated.
MemSQL throws error when I use a WHERE clause with ON DUPLICATE UPDATE KEY feature.
Can you please help me out if this can be achieved in any alternative way.
CREATE TABLE USER (
USER_ID INTEGER,
USERNAME VARCHAR(255),
RECORD_DATE DATE,
CONSTRAINT PK_USER PRIMARY KEY (USER_ID, RECORD_DATE)
);
–Direct inserts
INSERT INTO USER VALUES(1,‘A’,CURRENT_DATE());
INSERT INTO USER VALUES(1,‘B’,CURRENT_DATE())
ON DUPLICATE KEY UPDATE
USERNAME = ‘B’ WHERE USER_ID = 1 AND RECORD_DATE = CURRENT_DATE();
–Insert using Pipeline
–Stored Procedure
DELIMITER //
CREATE OR REPLACE PROCEDURE PROCESS_USERS(GENERIC_BATCH query(GENERIC_JSON json)) AS
BEGIN
INSERT INTO USER(USER_ID,USERNAME,RECORD_DATE)
SELECT GENERIC_JSON::USER_ID, GENERIC_JSON::$USERNAME, CURRENT_DATE() FROM GENERIC_BATCH
ON DUPLICATE KEY UPDATE USERNAME = GENERIC_JSON::$USERNAME
WHERE USER_ID = GENERIC_JSON::USERID AND RECORD_DATE = CURRENT_DATE();
END //
DELIMITER ;
–Pipeline Script
CREATE OR REPLACE PIPELINE TEST_PIPELINE_WITH_SP
AS LOAD DATA KAFKA ‘172.17.0.3:9092/test-topic’
INTO PROCEDURE PROCESS_USERS(GENERIC_JSON <- %) FORMAT JSON;