288 lines
12 KiB
SQL
288 lines
12 KiB
SQL
-- Antihoax Database Schema
|
|
-- SQLite adaptation of PostgreSQL 15+ schema
|
|
|
|
-- Drop existing tables (for clean migration)
|
|
DROP TABLE IF EXISTS source_categories;
|
|
DROP TABLE IF EXISTS reports;
|
|
DROP TABLE IF EXISTS sources;
|
|
DROP TABLE IF EXISTS categories;
|
|
DROP TABLE IF EXISTS users;
|
|
DROP TABLE IF EXISTS api_keys;
|
|
|
|
-- Users table (admins, moderators)
|
|
CREATE TABLE users (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
email VARCHAR(255) UNIQUE NOT NULL,
|
|
password_hash VARCHAR(255) NOT NULL,
|
|
name VARCHAR(100) NOT NULL,
|
|
role VARCHAR(20) DEFAULT 'moderator' CHECK (role IN ('admin', 'moderator')),
|
|
is_active BOOLEAN DEFAULT 1,
|
|
last_login DATETIME NULL,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Categories table
|
|
CREATE TABLE categories (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name VARCHAR(100) UNIQUE NOT NULL,
|
|
slug VARCHAR(100) UNIQUE NOT NULL,
|
|
description TEXT,
|
|
color VARCHAR(7) DEFAULT '#6B7280', -- hex color
|
|
priority INTEGER DEFAULT 1 CHECK (priority BETWEEN 1 AND 5),
|
|
icon VARCHAR(50), -- lucide icon name
|
|
is_active BOOLEAN DEFAULT 1,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Sources table (main problematic websites/pages)
|
|
CREATE TABLE sources (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
url VARCHAR(1000) UNIQUE NOT NULL,
|
|
domain VARCHAR(255) NOT NULL,
|
|
title VARCHAR(500),
|
|
description TEXT,
|
|
type VARCHAR(50) NOT NULL CHECK (type IN ('website', 'facebook_page', 'facebook_group', 'instagram', 'blog', 'news_site', 'youtube', 'tiktok', 'telegram', 'other')),
|
|
status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'verified', 'rejected', 'under_review')),
|
|
risk_level INTEGER DEFAULT 1 CHECK (risk_level BETWEEN 1 AND 5),
|
|
language VARCHAR(5) DEFAULT 'sk' CHECK (language IN ('sk', 'cs', 'en', 'other')),
|
|
evidence_urls TEXT, -- JSON array of proof URLs
|
|
reported_by VARCHAR(255), -- email or name
|
|
verified_by INTEGER REFERENCES users(id),
|
|
rejection_reason TEXT,
|
|
follower_count INTEGER DEFAULT 0,
|
|
last_checked DATETIME,
|
|
metadata TEXT DEFAULT '{}', -- JSON data
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Junction table for many-to-many relationship between sources and categories
|
|
CREATE TABLE source_categories (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
|
|
category_id INTEGER NOT NULL REFERENCES categories(id) ON DELETE CASCADE,
|
|
confidence_score DECIMAL(3,2) DEFAULT 1.0 CHECK (confidence_score BETWEEN 0 AND 1),
|
|
added_by INTEGER REFERENCES users(id),
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(source_id, category_id)
|
|
);
|
|
|
|
-- Reports table (user submissions)
|
|
CREATE TABLE reports (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
source_url VARCHAR(1000) NOT NULL,
|
|
source_domain VARCHAR(255) NOT NULL,
|
|
reporter_email VARCHAR(255),
|
|
reporter_name VARCHAR(100),
|
|
category_suggestions TEXT, -- JSON array of category IDs
|
|
description TEXT NOT NULL,
|
|
evidence_urls TEXT, -- JSON array: screenshots, articles, etc.
|
|
priority VARCHAR(20) DEFAULT 'medium' CHECK (priority IN ('low', 'medium', 'high', 'urgent')),
|
|
status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'in_review', 'approved', 'rejected', 'duplicate')),
|
|
assigned_to INTEGER REFERENCES users(id),
|
|
admin_notes TEXT,
|
|
processed_at DATETIME NULL,
|
|
ip_address VARCHAR(45), -- IPv4 or IPv6
|
|
user_agent TEXT,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- API Keys table (for external access)
|
|
CREATE TABLE api_keys (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
key_hash VARCHAR(255) UNIQUE NOT NULL,
|
|
name VARCHAR(100) NOT NULL,
|
|
description TEXT,
|
|
owner_email VARCHAR(255) NOT NULL,
|
|
permissions TEXT DEFAULT '["read"]', -- JSON array
|
|
rate_limit INTEGER DEFAULT 1000, -- requests per hour
|
|
is_active BOOLEAN DEFAULT 1,
|
|
usage_count INTEGER DEFAULT 0,
|
|
last_used DATETIME NULL,
|
|
expires_at DATETIME NULL,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Indexes for performance
|
|
CREATE INDEX idx_sources_domain ON sources(domain);
|
|
CREATE INDEX idx_sources_status ON sources(status);
|
|
CREATE INDEX idx_sources_risk_level ON sources(risk_level);
|
|
CREATE INDEX idx_sources_type ON sources(type);
|
|
CREATE INDEX idx_sources_created_at ON sources(created_at);
|
|
CREATE INDEX idx_sources_verified_by ON sources(verified_by);
|
|
|
|
CREATE INDEX idx_reports_status ON reports(status);
|
|
CREATE INDEX idx_reports_source_domain ON reports(source_domain);
|
|
CREATE INDEX idx_reports_priority ON reports(priority);
|
|
CREATE INDEX idx_reports_created_at ON reports(created_at);
|
|
CREATE INDEX idx_reports_assigned_to ON reports(assigned_to);
|
|
|
|
CREATE INDEX idx_categories_slug ON categories(slug);
|
|
CREATE INDEX idx_categories_priority ON categories(priority);
|
|
|
|
CREATE INDEX idx_source_categories_source_id ON source_categories(source_id);
|
|
CREATE INDEX idx_source_categories_category_id ON source_categories(category_id);
|
|
|
|
CREATE INDEX idx_api_keys_hash ON api_keys(key_hash);
|
|
CREATE INDEX idx_api_keys_owner ON api_keys(owner_email);
|
|
|
|
-- SQLite FTS indexes (using FTS5)
|
|
CREATE VIRTUAL TABLE sources_fts USING fts5(
|
|
title,
|
|
description,
|
|
content='sources',
|
|
content_rowid='id'
|
|
);
|
|
|
|
CREATE VIRTUAL TABLE reports_fts USING fts5(
|
|
description,
|
|
content='reports',
|
|
content_rowid='id'
|
|
);
|
|
|
|
-- Triggers to keep FTS in sync
|
|
CREATE TRIGGER sources_fts_insert AFTER INSERT ON sources BEGIN
|
|
INSERT INTO sources_fts(rowid, title, description)
|
|
VALUES (new.id, new.title, new.description);
|
|
END;
|
|
|
|
CREATE TRIGGER sources_fts_update AFTER UPDATE ON sources BEGIN
|
|
UPDATE sources_fts SET title = new.title, description = new.description
|
|
WHERE rowid = new.id;
|
|
END;
|
|
|
|
CREATE TRIGGER sources_fts_delete AFTER DELETE ON sources BEGIN
|
|
DELETE FROM sources_fts WHERE rowid = old.id;
|
|
END;
|
|
|
|
CREATE TRIGGER reports_fts_insert AFTER INSERT ON reports BEGIN
|
|
INSERT INTO reports_fts(rowid, description)
|
|
VALUES (new.id, new.description);
|
|
END;
|
|
|
|
CREATE TRIGGER reports_fts_update AFTER UPDATE ON reports BEGIN
|
|
UPDATE reports_fts SET description = new.description
|
|
WHERE rowid = new.id;
|
|
END;
|
|
|
|
CREATE TRIGGER reports_fts_delete AFTER DELETE ON reports BEGIN
|
|
DELETE FROM reports_fts WHERE rowid = old.id;
|
|
END;
|
|
|
|
-- Triggers for updated_at (SQLite version)
|
|
CREATE TRIGGER update_users_updated_at
|
|
AFTER UPDATE ON users FOR EACH ROW
|
|
BEGIN
|
|
UPDATE users SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
|
|
END;
|
|
|
|
CREATE TRIGGER update_sources_updated_at
|
|
AFTER UPDATE ON sources FOR EACH ROW
|
|
BEGIN
|
|
UPDATE sources SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
|
|
END;
|
|
|
|
CREATE TRIGGER update_categories_updated_at
|
|
AFTER UPDATE ON categories FOR EACH ROW
|
|
BEGIN
|
|
UPDATE categories SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
|
|
END;
|
|
|
|
CREATE TRIGGER update_reports_updated_at
|
|
AFTER UPDATE ON reports FOR EACH ROW
|
|
BEGIN
|
|
UPDATE reports SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
|
|
END;
|
|
|
|
CREATE TRIGGER update_api_keys_updated_at
|
|
AFTER UPDATE ON api_keys FOR EACH ROW
|
|
BEGIN
|
|
UPDATE api_keys SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
|
|
END;
|
|
|
|
-- Trigger to auto-populate domain field
|
|
CREATE TRIGGER set_sources_domain
|
|
BEFORE INSERT ON sources FOR EACH ROW
|
|
BEGIN
|
|
UPDATE sources SET domain = CASE
|
|
WHEN NEW.url LIKE 'http%://www.%' THEN substr(NEW.url, instr(NEW.url, '://www.') + 7, instr(substr(NEW.url, instr(NEW.url, '://www.') + 7), '/') - 1)
|
|
WHEN NEW.url LIKE 'http%://%' THEN substr(NEW.url, instr(NEW.url, '://') + 3, instr(substr(NEW.url, instr(NEW.url, '://') + 3), '/') - 1)
|
|
ELSE NEW.url
|
|
END WHERE id = NEW.id;
|
|
END;
|
|
|
|
CREATE TRIGGER set_reports_domain
|
|
BEFORE INSERT ON reports FOR EACH ROW
|
|
BEGIN
|
|
UPDATE reports SET source_domain = CASE
|
|
WHEN NEW.source_url LIKE 'http%://www.%' THEN substr(NEW.source_url, instr(NEW.source_url, '://www.') + 7, instr(substr(NEW.source_url, instr(NEW.source_url, '://www.') + 7), '/') - 1)
|
|
WHEN NEW.source_url LIKE 'http%://%' THEN substr(NEW.source_url, instr(NEW.source_url, '://') + 3, instr(substr(NEW.source_url, instr(NEW.source_url, '://') + 3), '/') - 1)
|
|
ELSE NEW.source_url
|
|
END WHERE id = NEW.id;
|
|
END;
|
|
|
|
-- Insert default categories
|
|
INSERT INTO categories (name, slug, description, color, priority, icon) VALUES
|
|
('Hoax', 'hoax', 'Šírenie nepravdivých informácií a hoaxov', '#EF4444', 5, 'AlertTriangle'),
|
|
('Hate Speech', 'hate-speech', 'Nenávistné prejavy proti skupinám ľudí', '#DC2626', 5, 'MessageSquareX'),
|
|
('Violence', 'violence', 'Povzbudzovanie k násiliu', '#B91C1C', 5, 'Sword'),
|
|
('Racism', 'racism', 'Rasistické a diskriminačné obsahy', '#991B1B', 5, 'Users'),
|
|
('Conspiracy', 'conspiracy', 'Konšpiračné teórie', '#F59E0B', 3, 'Eye'),
|
|
('Propaganda', 'propaganda', 'Politická propaganda a manipulácia', '#D97706', 2, 'Megaphone'),
|
|
('Spam', 'spam', 'Spam a podvodné obsahy', '#6B7280', 1, 'Mail'),
|
|
('Extremism', 'extremism', 'Extrémistické ideológie', '#7C2D12', 5, 'Flame'),
|
|
('Medical Misinformation', 'medical-misinfo', 'Nepravdivé zdravotné informácie', '#EA580C', 4, 'Heart'),
|
|
('Financial Scam', 'financial-scam', 'Finančné podvody a pyramid schemes', '#C2410C', 4, 'DollarSign');
|
|
|
|
-- Insert default admin user (password: admin123 - change in production!)
|
|
INSERT INTO users (email, password_hash, name, role) VALUES
|
|
('admin@antihoax.sk', '$2b$12$LQv3c1yqBWVHxkd0LHAkCOYz6TtxMQJqhN8/LewLON5QhMlPsJiTi', 'System Admin', 'admin');
|
|
|
|
-- Insert some example API key (for testing)
|
|
INSERT INTO api_keys (key_hash, name, description, owner_email, permissions, rate_limit) VALUES
|
|
('$2b$12$example_hash_here', 'Chrome Extension', 'API key for browser extension', 'extension@antihoax.sk', '["read"]', 10000);
|
|
|
|
-- Views for easier querying (SQLite compatible)
|
|
|
|
-- View: Sources with categories
|
|
CREATE VIEW sources_with_categories AS
|
|
SELECT
|
|
s.*,
|
|
GROUP_CONCAT(c.name) as category_names,
|
|
GROUP_CONCAT(c.slug) as category_slugs,
|
|
GROUP_CONCAT(c.color) as category_colors
|
|
FROM sources s
|
|
LEFT JOIN source_categories sc ON s.id = sc.source_id
|
|
LEFT JOIN categories c ON sc.category_id = c.id AND c.is_active = 1
|
|
WHERE s.status = 'verified'
|
|
GROUP BY s.id;
|
|
|
|
-- View: Dashboard statistics
|
|
CREATE VIEW dashboard_stats AS
|
|
SELECT
|
|
(SELECT COUNT(*) FROM sources WHERE status = 'verified') as total_sources,
|
|
(SELECT COUNT(*) FROM sources WHERE status = 'pending') as pending_sources,
|
|
(SELECT COUNT(*) FROM reports WHERE status = 'pending') as pending_reports,
|
|
(SELECT COUNT(*) FROM sources WHERE status = 'verified' AND risk_level >= 4) as high_risk_sources,
|
|
(SELECT COUNT(*) FROM sources WHERE created_at > datetime('now', '-7 days')) as sources_added_week,
|
|
(SELECT COUNT(*) FROM reports WHERE created_at > datetime('now', '-1 day')) as reports_today;
|
|
|
|
-- View: Top risky domains
|
|
CREATE VIEW top_risky_domains AS
|
|
SELECT
|
|
domain,
|
|
COUNT(*) as source_count,
|
|
AVG(risk_level) as avg_risk_level,
|
|
MAX(risk_level) as max_risk_level,
|
|
GROUP_CONCAT(DISTINCT c.name) as categories
|
|
FROM sources s
|
|
LEFT JOIN source_categories sc ON s.id = sc.source_id
|
|
LEFT JOIN categories c ON sc.category_id = c.id
|
|
WHERE s.status = 'verified'
|
|
GROUP BY domain
|
|
HAVING AVG(risk_level) >= 3
|
|
ORDER BY avg_risk_level DESC, source_count DESC; |