-- Antihoax Database Schema -- SQLite adaptation of PostgreSQL 15+ schema -- Drop existing tables (for clean migration) DROP TABLE IF EXISTS source_categories; DROP TABLE IF EXISTS reports; DROP TABLE IF EXISTS sources; DROP TABLE IF EXISTS categories; DROP TABLE IF EXISTS users; DROP TABLE IF EXISTS api_keys; -- Users table (admins, moderators) CREATE TABLE users ( id INTEGER PRIMARY KEY AUTOINCREMENT, email VARCHAR(255) UNIQUE NOT NULL, password_hash VARCHAR(255) NOT NULL, name VARCHAR(100) NOT NULL, role VARCHAR(20) DEFAULT 'moderator' CHECK (role IN ('admin', 'moderator')), is_active BOOLEAN DEFAULT 1, last_login DATETIME NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); -- Categories table CREATE TABLE categories ( id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(100) UNIQUE NOT NULL, slug VARCHAR(100) UNIQUE NOT NULL, description TEXT, color VARCHAR(7) DEFAULT '#6B7280', -- hex color priority INTEGER DEFAULT 1 CHECK (priority BETWEEN 1 AND 5), icon VARCHAR(50), -- lucide icon name is_active BOOLEAN DEFAULT 1, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); -- Sources table (main problematic websites/pages) CREATE TABLE sources ( id INTEGER PRIMARY KEY AUTOINCREMENT, url VARCHAR(1000) UNIQUE NOT NULL, domain VARCHAR(255) NOT NULL, title VARCHAR(500), description TEXT, type VARCHAR(50) NOT NULL CHECK (type IN ('website', 'facebook_page', 'facebook_group', 'instagram', 'blog', 'news_site', 'youtube', 'tiktok', 'telegram', 'other')), status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'verified', 'rejected', 'under_review')), risk_level INTEGER DEFAULT 1 CHECK (risk_level BETWEEN 1 AND 5), language VARCHAR(5) DEFAULT 'sk' CHECK (language IN ('sk', 'cs', 'en', 'other')), evidence_urls TEXT, -- JSON array of proof URLs reported_by VARCHAR(255), -- email or name verified_by INTEGER REFERENCES users(id), rejection_reason TEXT, follower_count INTEGER DEFAULT 0, last_checked DATETIME, metadata TEXT DEFAULT '{}', -- JSON data created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); -- Junction table for many-to-many relationship between sources and categories CREATE TABLE source_categories ( id INTEGER PRIMARY KEY AUTOINCREMENT, source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE, category_id INTEGER NOT NULL REFERENCES categories(id) ON DELETE CASCADE, confidence_score DECIMAL(3,2) DEFAULT 1.0 CHECK (confidence_score BETWEEN 0 AND 1), added_by INTEGER REFERENCES users(id), created_at DATETIME DEFAULT CURRENT_TIMESTAMP, UNIQUE(source_id, category_id) ); -- Reports table (user submissions) CREATE TABLE reports ( id INTEGER PRIMARY KEY AUTOINCREMENT, source_url VARCHAR(1000) NOT NULL, source_domain VARCHAR(255) NOT NULL, reporter_email VARCHAR(255), reporter_name VARCHAR(100), category_suggestions TEXT, -- JSON array of category IDs description TEXT NOT NULL, evidence_urls TEXT, -- JSON array: screenshots, articles, etc. priority VARCHAR(20) DEFAULT 'medium' CHECK (priority IN ('low', 'medium', 'high', 'urgent')), status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'in_review', 'approved', 'rejected', 'duplicate')), assigned_to INTEGER REFERENCES users(id), admin_notes TEXT, processed_at DATETIME NULL, ip_address VARCHAR(45), -- IPv4 or IPv6 user_agent TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); -- API Keys table (for external access) CREATE TABLE api_keys ( id INTEGER PRIMARY KEY AUTOINCREMENT, key_hash VARCHAR(255) UNIQUE NOT NULL, name VARCHAR(100) NOT NULL, description TEXT, owner_email VARCHAR(255) NOT NULL, permissions TEXT DEFAULT '["read"]', -- JSON array rate_limit INTEGER DEFAULT 1000, -- requests per hour is_active BOOLEAN DEFAULT 1, usage_count INTEGER DEFAULT 0, last_used DATETIME NULL, expires_at DATETIME NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); -- Indexes for performance CREATE INDEX idx_sources_domain ON sources(domain); CREATE INDEX idx_sources_status ON sources(status); CREATE INDEX idx_sources_risk_level ON sources(risk_level); CREATE INDEX idx_sources_type ON sources(type); CREATE INDEX idx_sources_created_at ON sources(created_at); CREATE INDEX idx_sources_verified_by ON sources(verified_by); CREATE INDEX idx_reports_status ON reports(status); CREATE INDEX idx_reports_source_domain ON reports(source_domain); CREATE INDEX idx_reports_priority ON reports(priority); CREATE INDEX idx_reports_created_at ON reports(created_at); CREATE INDEX idx_reports_assigned_to ON reports(assigned_to); CREATE INDEX idx_categories_slug ON categories(slug); CREATE INDEX idx_categories_priority ON categories(priority); CREATE INDEX idx_source_categories_source_id ON source_categories(source_id); CREATE INDEX idx_source_categories_category_id ON source_categories(category_id); CREATE INDEX idx_api_keys_hash ON api_keys(key_hash); CREATE INDEX idx_api_keys_owner ON api_keys(owner_email); -- SQLite FTS indexes (using FTS5) CREATE VIRTUAL TABLE sources_fts USING fts5( title, description, content='sources', content_rowid='id' ); CREATE VIRTUAL TABLE reports_fts USING fts5( description, content='reports', content_rowid='id' ); -- Triggers to keep FTS in sync CREATE TRIGGER sources_fts_insert AFTER INSERT ON sources BEGIN INSERT INTO sources_fts(rowid, title, description) VALUES (new.id, new.title, new.description); END; CREATE TRIGGER sources_fts_update AFTER UPDATE ON sources BEGIN UPDATE sources_fts SET title = new.title, description = new.description WHERE rowid = new.id; END; CREATE TRIGGER sources_fts_delete AFTER DELETE ON sources BEGIN DELETE FROM sources_fts WHERE rowid = old.id; END; CREATE TRIGGER reports_fts_insert AFTER INSERT ON reports BEGIN INSERT INTO reports_fts(rowid, description) VALUES (new.id, new.description); END; CREATE TRIGGER reports_fts_update AFTER UPDATE ON reports BEGIN UPDATE reports_fts SET description = new.description WHERE rowid = new.id; END; CREATE TRIGGER reports_fts_delete AFTER DELETE ON reports BEGIN DELETE FROM reports_fts WHERE rowid = old.id; END; -- Triggers for updated_at (SQLite version) CREATE TRIGGER update_users_updated_at AFTER UPDATE ON users FOR EACH ROW BEGIN UPDATE users SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; END; CREATE TRIGGER update_sources_updated_at AFTER UPDATE ON sources FOR EACH ROW BEGIN UPDATE sources SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; END; CREATE TRIGGER update_categories_updated_at AFTER UPDATE ON categories FOR EACH ROW BEGIN UPDATE categories SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; END; CREATE TRIGGER update_reports_updated_at AFTER UPDATE ON reports FOR EACH ROW BEGIN UPDATE reports SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; END; CREATE TRIGGER update_api_keys_updated_at AFTER UPDATE ON api_keys FOR EACH ROW BEGIN UPDATE api_keys SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; END; -- Trigger to auto-populate domain field CREATE TRIGGER set_sources_domain BEFORE INSERT ON sources FOR EACH ROW BEGIN UPDATE sources SET domain = CASE WHEN NEW.url LIKE 'http%://www.%' THEN substr(NEW.url, instr(NEW.url, '://www.') + 7, instr(substr(NEW.url, instr(NEW.url, '://www.') + 7), '/') - 1) WHEN NEW.url LIKE 'http%://%' THEN substr(NEW.url, instr(NEW.url, '://') + 3, instr(substr(NEW.url, instr(NEW.url, '://') + 3), '/') - 1) ELSE NEW.url END WHERE id = NEW.id; END; CREATE TRIGGER set_reports_domain BEFORE INSERT ON reports FOR EACH ROW BEGIN UPDATE reports SET source_domain = CASE WHEN NEW.source_url LIKE 'http%://www.%' THEN substr(NEW.source_url, instr(NEW.source_url, '://www.') + 7, instr(substr(NEW.source_url, instr(NEW.source_url, '://www.') + 7), '/') - 1) WHEN NEW.source_url LIKE 'http%://%' THEN substr(NEW.source_url, instr(NEW.source_url, '://') + 3, instr(substr(NEW.source_url, instr(NEW.source_url, '://') + 3), '/') - 1) ELSE NEW.source_url END WHERE id = NEW.id; END; -- Insert default categories INSERT INTO categories (name, slug, description, color, priority, icon) VALUES ('Hoax', 'hoax', 'Šírenie nepravdivých informácií a hoaxov', '#EF4444', 5, 'AlertTriangle'), ('Hate Speech', 'hate-speech', 'Nenávistné prejavy proti skupinám ľudí', '#DC2626', 5, 'MessageSquareX'), ('Violence', 'violence', 'Povzbudzovanie k násiliu', '#B91C1C', 5, 'Sword'), ('Racism', 'racism', 'Rasistické a diskriminačné obsahy', '#991B1B', 5, 'Users'), ('Conspiracy', 'conspiracy', 'Konšpiračné teórie', '#F59E0B', 3, 'Eye'), ('Propaganda', 'propaganda', 'Politická propaganda a manipulácia', '#D97706', 2, 'Megaphone'), ('Spam', 'spam', 'Spam a podvodné obsahy', '#6B7280', 1, 'Mail'), ('Extremism', 'extremism', 'Extrémistické ideológie', '#7C2D12', 5, 'Flame'), ('Medical Misinformation', 'medical-misinfo', 'Nepravdivé zdravotné informácie', '#EA580C', 4, 'Heart'), ('Financial Scam', 'financial-scam', 'Finančné podvody a pyramid schemes', '#C2410C', 4, 'DollarSign'); -- Insert default admin user (password: admin123 - change in production!) INSERT INTO users (email, password_hash, name, role) VALUES ('admin@antihoax.sk', '$2b$12$LQv3c1yqBWVHxkd0LHAkCOYz6TtxMQJqhN8/LewLON5QhMlPsJiTi', 'System Admin', 'admin'); -- Insert some example API key (for testing) INSERT INTO api_keys (key_hash, name, description, owner_email, permissions, rate_limit) VALUES ('$2b$12$example_hash_here', 'Chrome Extension', 'API key for browser extension', 'extension@antihoax.sk', '["read"]', 10000); -- Views for easier querying (SQLite compatible) -- View: Sources with categories CREATE VIEW sources_with_categories AS SELECT s.*, GROUP_CONCAT(c.name) as category_names, GROUP_CONCAT(c.slug) as category_slugs, GROUP_CONCAT(c.color) as category_colors FROM sources s LEFT JOIN source_categories sc ON s.id = sc.source_id LEFT JOIN categories c ON sc.category_id = c.id AND c.is_active = 1 WHERE s.status = 'verified' GROUP BY s.id; -- View: Dashboard statistics CREATE VIEW dashboard_stats AS SELECT (SELECT COUNT(*) FROM sources WHERE status = 'verified') as total_sources, (SELECT COUNT(*) FROM sources WHERE status = 'pending') as pending_sources, (SELECT COUNT(*) FROM reports WHERE status = 'pending') as pending_reports, (SELECT COUNT(*) FROM sources WHERE status = 'verified' AND risk_level >= 4) as high_risk_sources, (SELECT COUNT(*) FROM sources WHERE created_at > datetime('now', '-7 days')) as sources_added_week, (SELECT COUNT(*) FROM reports WHERE created_at > datetime('now', '-1 day')) as reports_today; -- View: Top risky domains CREATE VIEW top_risky_domains AS SELECT domain, COUNT(*) as source_count, AVG(risk_level) as avg_risk_level, MAX(risk_level) as max_risk_level, GROUP_CONCAT(DISTINCT c.name) as categories FROM sources s LEFT JOIN source_categories sc ON s.id = sc.source_id LEFT JOIN categories c ON sc.category_id = c.id WHERE s.status = 'verified' GROUP BY domain HAVING AVG(risk_level) >= 3 ORDER BY avg_risk_level DESC, source_count DESC;