diff --git a/database/antihoax.db b/database/antihoax.db new file mode 100644 index 0000000..3075b0a Binary files /dev/null and b/database/antihoax.db differ diff --git a/database/schema.sql b/database/schema.sql new file mode 100644 index 0000000..54e9918 --- /dev/null +++ b/database/schema.sql @@ -0,0 +1,288 @@ +-- Antihoax Database Schema +-- SQLite adaptation of PostgreSQL 15+ schema + +-- Drop existing tables (for clean migration) +DROP TABLE IF EXISTS source_categories; +DROP TABLE IF EXISTS reports; +DROP TABLE IF EXISTS sources; +DROP TABLE IF EXISTS categories; +DROP TABLE IF EXISTS users; +DROP TABLE IF EXISTS api_keys; + +-- Users table (admins, moderators) +CREATE TABLE users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + email VARCHAR(255) UNIQUE NOT NULL, + password_hash VARCHAR(255) NOT NULL, + name VARCHAR(100) NOT NULL, + role VARCHAR(20) DEFAULT 'moderator' CHECK (role IN ('admin', 'moderator')), + is_active BOOLEAN DEFAULT 1, + last_login DATETIME NULL, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP +); + +-- Categories table +CREATE TABLE categories ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name VARCHAR(100) UNIQUE NOT NULL, + slug VARCHAR(100) UNIQUE NOT NULL, + description TEXT, + color VARCHAR(7) DEFAULT '#6B7280', -- hex color + priority INTEGER DEFAULT 1 CHECK (priority BETWEEN 1 AND 5), + icon VARCHAR(50), -- lucide icon name + is_active BOOLEAN DEFAULT 1, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP +); + +-- Sources table (main problematic websites/pages) +CREATE TABLE sources ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + url VARCHAR(1000) UNIQUE NOT NULL, + domain VARCHAR(255) NOT NULL, + title VARCHAR(500), + description TEXT, + type VARCHAR(50) NOT NULL CHECK (type IN ('website', 'facebook_page', 'facebook_group', 'instagram', 'blog', 'news_site', 'youtube', 'tiktok', 'telegram', 'other')), + status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'verified', 'rejected', 'under_review')), + risk_level INTEGER DEFAULT 1 CHECK (risk_level BETWEEN 1 AND 5), + language VARCHAR(5) DEFAULT 'sk' CHECK (language IN ('sk', 'cs', 'en', 'other')), + evidence_urls TEXT, -- JSON array of proof URLs + reported_by VARCHAR(255), -- email or name + verified_by INTEGER REFERENCES users(id), + rejection_reason TEXT, + follower_count INTEGER DEFAULT 0, + last_checked DATETIME, + metadata TEXT DEFAULT '{}', -- JSON data + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP +); + +-- Junction table for many-to-many relationship between sources and categories +CREATE TABLE source_categories ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE, + category_id INTEGER NOT NULL REFERENCES categories(id) ON DELETE CASCADE, + confidence_score DECIMAL(3,2) DEFAULT 1.0 CHECK (confidence_score BETWEEN 0 AND 1), + added_by INTEGER REFERENCES users(id), + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + UNIQUE(source_id, category_id) +); + +-- Reports table (user submissions) +CREATE TABLE reports ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_url VARCHAR(1000) NOT NULL, + source_domain VARCHAR(255) NOT NULL, + reporter_email VARCHAR(255), + reporter_name VARCHAR(100), + category_suggestions TEXT, -- JSON array of category IDs + description TEXT NOT NULL, + evidence_urls TEXT, -- JSON array: screenshots, articles, etc. + priority VARCHAR(20) DEFAULT 'medium' CHECK (priority IN ('low', 'medium', 'high', 'urgent')), + status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'in_review', 'approved', 'rejected', 'duplicate')), + assigned_to INTEGER REFERENCES users(id), + admin_notes TEXT, + processed_at DATETIME NULL, + ip_address VARCHAR(45), -- IPv4 or IPv6 + user_agent TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP +); + +-- API Keys table (for external access) +CREATE TABLE api_keys ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + key_hash VARCHAR(255) UNIQUE NOT NULL, + name VARCHAR(100) NOT NULL, + description TEXT, + owner_email VARCHAR(255) NOT NULL, + permissions TEXT DEFAULT '["read"]', -- JSON array + rate_limit INTEGER DEFAULT 1000, -- requests per hour + is_active BOOLEAN DEFAULT 1, + usage_count INTEGER DEFAULT 0, + last_used DATETIME NULL, + expires_at DATETIME NULL, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP +); + +-- Indexes for performance +CREATE INDEX idx_sources_domain ON sources(domain); +CREATE INDEX idx_sources_status ON sources(status); +CREATE INDEX idx_sources_risk_level ON sources(risk_level); +CREATE INDEX idx_sources_type ON sources(type); +CREATE INDEX idx_sources_created_at ON sources(created_at); +CREATE INDEX idx_sources_verified_by ON sources(verified_by); + +CREATE INDEX idx_reports_status ON reports(status); +CREATE INDEX idx_reports_source_domain ON reports(source_domain); +CREATE INDEX idx_reports_priority ON reports(priority); +CREATE INDEX idx_reports_created_at ON reports(created_at); +CREATE INDEX idx_reports_assigned_to ON reports(assigned_to); + +CREATE INDEX idx_categories_slug ON categories(slug); +CREATE INDEX idx_categories_priority ON categories(priority); + +CREATE INDEX idx_source_categories_source_id ON source_categories(source_id); +CREATE INDEX idx_source_categories_category_id ON source_categories(category_id); + +CREATE INDEX idx_api_keys_hash ON api_keys(key_hash); +CREATE INDEX idx_api_keys_owner ON api_keys(owner_email); + +-- SQLite FTS indexes (using FTS5) +CREATE VIRTUAL TABLE sources_fts USING fts5( + title, + description, + content='sources', + content_rowid='id' +); + +CREATE VIRTUAL TABLE reports_fts USING fts5( + description, + content='reports', + content_rowid='id' +); + +-- Triggers to keep FTS in sync +CREATE TRIGGER sources_fts_insert AFTER INSERT ON sources BEGIN + INSERT INTO sources_fts(rowid, title, description) + VALUES (new.id, new.title, new.description); +END; + +CREATE TRIGGER sources_fts_update AFTER UPDATE ON sources BEGIN + UPDATE sources_fts SET title = new.title, description = new.description + WHERE rowid = new.id; +END; + +CREATE TRIGGER sources_fts_delete AFTER DELETE ON sources BEGIN + DELETE FROM sources_fts WHERE rowid = old.id; +END; + +CREATE TRIGGER reports_fts_insert AFTER INSERT ON reports BEGIN + INSERT INTO reports_fts(rowid, description) + VALUES (new.id, new.description); +END; + +CREATE TRIGGER reports_fts_update AFTER UPDATE ON reports BEGIN + UPDATE reports_fts SET description = new.description + WHERE rowid = new.id; +END; + +CREATE TRIGGER reports_fts_delete AFTER DELETE ON reports BEGIN + DELETE FROM reports_fts WHERE rowid = old.id; +END; + +-- Triggers for updated_at (SQLite version) +CREATE TRIGGER update_users_updated_at + AFTER UPDATE ON users FOR EACH ROW + BEGIN + UPDATE users SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; + END; + +CREATE TRIGGER update_sources_updated_at + AFTER UPDATE ON sources FOR EACH ROW + BEGIN + UPDATE sources SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; + END; + +CREATE TRIGGER update_categories_updated_at + AFTER UPDATE ON categories FOR EACH ROW + BEGIN + UPDATE categories SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; + END; + +CREATE TRIGGER update_reports_updated_at + AFTER UPDATE ON reports FOR EACH ROW + BEGIN + UPDATE reports SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; + END; + +CREATE TRIGGER update_api_keys_updated_at + AFTER UPDATE ON api_keys FOR EACH ROW + BEGIN + UPDATE api_keys SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; + END; + +-- Trigger to auto-populate domain field +CREATE TRIGGER set_sources_domain + BEFORE INSERT ON sources FOR EACH ROW + BEGIN + UPDATE sources SET domain = CASE + WHEN NEW.url LIKE 'http%://www.%' THEN substr(NEW.url, instr(NEW.url, '://www.') + 7, instr(substr(NEW.url, instr(NEW.url, '://www.') + 7), '/') - 1) + WHEN NEW.url LIKE 'http%://%' THEN substr(NEW.url, instr(NEW.url, '://') + 3, instr(substr(NEW.url, instr(NEW.url, '://') + 3), '/') - 1) + ELSE NEW.url + END WHERE id = NEW.id; + END; + +CREATE TRIGGER set_reports_domain + BEFORE INSERT ON reports FOR EACH ROW + BEGIN + UPDATE reports SET source_domain = CASE + WHEN NEW.source_url LIKE 'http%://www.%' THEN substr(NEW.source_url, instr(NEW.source_url, '://www.') + 7, instr(substr(NEW.source_url, instr(NEW.source_url, '://www.') + 7), '/') - 1) + WHEN NEW.source_url LIKE 'http%://%' THEN substr(NEW.source_url, instr(NEW.source_url, '://') + 3, instr(substr(NEW.source_url, instr(NEW.source_url, '://') + 3), '/') - 1) + ELSE NEW.source_url + END WHERE id = NEW.id; + END; + +-- Insert default categories +INSERT INTO categories (name, slug, description, color, priority, icon) VALUES +('Hoax', 'hoax', 'Šírenie nepravdivých informácií a hoaxov', '#EF4444', 5, 'AlertTriangle'), +('Hate Speech', 'hate-speech', 'Nenávistné prejavy proti skupinám ľudí', '#DC2626', 5, 'MessageSquareX'), +('Violence', 'violence', 'Povzbudzovanie k násiliu', '#B91C1C', 5, 'Sword'), +('Racism', 'racism', 'Rasistické a diskriminačné obsahy', '#991B1B', 5, 'Users'), +('Conspiracy', 'conspiracy', 'Konšpiračné teórie', '#F59E0B', 3, 'Eye'), +('Propaganda', 'propaganda', 'Politická propaganda a manipulácia', '#D97706', 2, 'Megaphone'), +('Spam', 'spam', 'Spam a podvodné obsahy', '#6B7280', 1, 'Mail'), +('Extremism', 'extremism', 'Extrémistické ideológie', '#7C2D12', 5, 'Flame'), +('Medical Misinformation', 'medical-misinfo', 'Nepravdivé zdravotné informácie', '#EA580C', 4, 'Heart'), +('Financial Scam', 'financial-scam', 'Finančné podvody a pyramid schemes', '#C2410C', 4, 'DollarSign'); + +-- Insert default admin user (password: admin123 - change in production!) +INSERT INTO users (email, password_hash, name, role) VALUES +('admin@antihoax.sk', '$2b$12$LQv3c1yqBWVHxkd0LHAkCOYz6TtxMQJqhN8/LewLON5QhMlPsJiTi', 'System Admin', 'admin'); + +-- Insert some example API key (for testing) +INSERT INTO api_keys (key_hash, name, description, owner_email, permissions, rate_limit) VALUES +('$2b$12$example_hash_here', 'Chrome Extension', 'API key for browser extension', 'extension@antihoax.sk', '["read"]', 10000); + +-- Views for easier querying (SQLite compatible) + +-- View: Sources with categories +CREATE VIEW sources_with_categories AS +SELECT + s.*, + GROUP_CONCAT(c.name) as category_names, + GROUP_CONCAT(c.slug) as category_slugs, + GROUP_CONCAT(c.color) as category_colors +FROM sources s +LEFT JOIN source_categories sc ON s.id = sc.source_id +LEFT JOIN categories c ON sc.category_id = c.id AND c.is_active = 1 +WHERE s.status = 'verified' +GROUP BY s.id; + +-- View: Dashboard statistics +CREATE VIEW dashboard_stats AS +SELECT + (SELECT COUNT(*) FROM sources WHERE status = 'verified') as total_sources, + (SELECT COUNT(*) FROM sources WHERE status = 'pending') as pending_sources, + (SELECT COUNT(*) FROM reports WHERE status = 'pending') as pending_reports, + (SELECT COUNT(*) FROM sources WHERE status = 'verified' AND risk_level >= 4) as high_risk_sources, + (SELECT COUNT(*) FROM sources WHERE created_at > datetime('now', '-7 days')) as sources_added_week, + (SELECT COUNT(*) FROM reports WHERE created_at > datetime('now', '-1 day')) as reports_today; + +-- View: Top risky domains +CREATE VIEW top_risky_domains AS +SELECT + domain, + COUNT(*) as source_count, + AVG(risk_level) as avg_risk_level, + MAX(risk_level) as max_risk_level, + GROUP_CONCAT(DISTINCT c.name) as categories +FROM sources s +LEFT JOIN source_categories sc ON s.id = sc.source_id +LEFT JOIN categories c ON sc.category_id = c.id +WHERE s.status = 'verified' +GROUP BY domain +HAVING AVG(risk_level) >= 3 +ORDER BY avg_risk_level DESC, source_count DESC; \ No newline at end of file