add database

This commit is contained in:
2023-05-08 18:52:45 +02:00
parent 1bad3ddd9d
commit 33c381be18
2 changed files with 288 additions and 0 deletions

BIN
database/antihoax.db Normal file

Binary file not shown.

288
database/schema.sql Normal file
View File

@@ -0,0 +1,288 @@
-- Antihoax Database Schema
-- SQLite adaptation of PostgreSQL 15+ schema
-- Drop existing tables (for clean migration)
DROP TABLE IF EXISTS source_categories;
DROP TABLE IF EXISTS reports;
DROP TABLE IF EXISTS sources;
DROP TABLE IF EXISTS categories;
DROP TABLE IF EXISTS users;
DROP TABLE IF EXISTS api_keys;
-- Users table (admins, moderators)
CREATE TABLE users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
email VARCHAR(255) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
name VARCHAR(100) NOT NULL,
role VARCHAR(20) DEFAULT 'moderator' CHECK (role IN ('admin', 'moderator')),
is_active BOOLEAN DEFAULT 1,
last_login DATETIME NULL,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Categories table
CREATE TABLE categories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name VARCHAR(100) UNIQUE NOT NULL,
slug VARCHAR(100) UNIQUE NOT NULL,
description TEXT,
color VARCHAR(7) DEFAULT '#6B7280', -- hex color
priority INTEGER DEFAULT 1 CHECK (priority BETWEEN 1 AND 5),
icon VARCHAR(50), -- lucide icon name
is_active BOOLEAN DEFAULT 1,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Sources table (main problematic websites/pages)
CREATE TABLE sources (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url VARCHAR(1000) UNIQUE NOT NULL,
domain VARCHAR(255) NOT NULL,
title VARCHAR(500),
description TEXT,
type VARCHAR(50) NOT NULL CHECK (type IN ('website', 'facebook_page', 'facebook_group', 'instagram', 'blog', 'news_site', 'youtube', 'tiktok', 'telegram', 'other')),
status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'verified', 'rejected', 'under_review')),
risk_level INTEGER DEFAULT 1 CHECK (risk_level BETWEEN 1 AND 5),
language VARCHAR(5) DEFAULT 'sk' CHECK (language IN ('sk', 'cs', 'en', 'other')),
evidence_urls TEXT, -- JSON array of proof URLs
reported_by VARCHAR(255), -- email or name
verified_by INTEGER REFERENCES users(id),
rejection_reason TEXT,
follower_count INTEGER DEFAULT 0,
last_checked DATETIME,
metadata TEXT DEFAULT '{}', -- JSON data
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Junction table for many-to-many relationship between sources and categories
CREATE TABLE source_categories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
category_id INTEGER NOT NULL REFERENCES categories(id) ON DELETE CASCADE,
confidence_score DECIMAL(3,2) DEFAULT 1.0 CHECK (confidence_score BETWEEN 0 AND 1),
added_by INTEGER REFERENCES users(id),
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE(source_id, category_id)
);
-- Reports table (user submissions)
CREATE TABLE reports (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_url VARCHAR(1000) NOT NULL,
source_domain VARCHAR(255) NOT NULL,
reporter_email VARCHAR(255),
reporter_name VARCHAR(100),
category_suggestions TEXT, -- JSON array of category IDs
description TEXT NOT NULL,
evidence_urls TEXT, -- JSON array: screenshots, articles, etc.
priority VARCHAR(20) DEFAULT 'medium' CHECK (priority IN ('low', 'medium', 'high', 'urgent')),
status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'in_review', 'approved', 'rejected', 'duplicate')),
assigned_to INTEGER REFERENCES users(id),
admin_notes TEXT,
processed_at DATETIME NULL,
ip_address VARCHAR(45), -- IPv4 or IPv6
user_agent TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- API Keys table (for external access)
CREATE TABLE api_keys (
id INTEGER PRIMARY KEY AUTOINCREMENT,
key_hash VARCHAR(255) UNIQUE NOT NULL,
name VARCHAR(100) NOT NULL,
description TEXT,
owner_email VARCHAR(255) NOT NULL,
permissions TEXT DEFAULT '["read"]', -- JSON array
rate_limit INTEGER DEFAULT 1000, -- requests per hour
is_active BOOLEAN DEFAULT 1,
usage_count INTEGER DEFAULT 0,
last_used DATETIME NULL,
expires_at DATETIME NULL,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Indexes for performance
CREATE INDEX idx_sources_domain ON sources(domain);
CREATE INDEX idx_sources_status ON sources(status);
CREATE INDEX idx_sources_risk_level ON sources(risk_level);
CREATE INDEX idx_sources_type ON sources(type);
CREATE INDEX idx_sources_created_at ON sources(created_at);
CREATE INDEX idx_sources_verified_by ON sources(verified_by);
CREATE INDEX idx_reports_status ON reports(status);
CREATE INDEX idx_reports_source_domain ON reports(source_domain);
CREATE INDEX idx_reports_priority ON reports(priority);
CREATE INDEX idx_reports_created_at ON reports(created_at);
CREATE INDEX idx_reports_assigned_to ON reports(assigned_to);
CREATE INDEX idx_categories_slug ON categories(slug);
CREATE INDEX idx_categories_priority ON categories(priority);
CREATE INDEX idx_source_categories_source_id ON source_categories(source_id);
CREATE INDEX idx_source_categories_category_id ON source_categories(category_id);
CREATE INDEX idx_api_keys_hash ON api_keys(key_hash);
CREATE INDEX idx_api_keys_owner ON api_keys(owner_email);
-- SQLite FTS indexes (using FTS5)
CREATE VIRTUAL TABLE sources_fts USING fts5(
title,
description,
content='sources',
content_rowid='id'
);
CREATE VIRTUAL TABLE reports_fts USING fts5(
description,
content='reports',
content_rowid='id'
);
-- Triggers to keep FTS in sync
CREATE TRIGGER sources_fts_insert AFTER INSERT ON sources BEGIN
INSERT INTO sources_fts(rowid, title, description)
VALUES (new.id, new.title, new.description);
END;
CREATE TRIGGER sources_fts_update AFTER UPDATE ON sources BEGIN
UPDATE sources_fts SET title = new.title, description = new.description
WHERE rowid = new.id;
END;
CREATE TRIGGER sources_fts_delete AFTER DELETE ON sources BEGIN
DELETE FROM sources_fts WHERE rowid = old.id;
END;
CREATE TRIGGER reports_fts_insert AFTER INSERT ON reports BEGIN
INSERT INTO reports_fts(rowid, description)
VALUES (new.id, new.description);
END;
CREATE TRIGGER reports_fts_update AFTER UPDATE ON reports BEGIN
UPDATE reports_fts SET description = new.description
WHERE rowid = new.id;
END;
CREATE TRIGGER reports_fts_delete AFTER DELETE ON reports BEGIN
DELETE FROM reports_fts WHERE rowid = old.id;
END;
-- Triggers for updated_at (SQLite version)
CREATE TRIGGER update_users_updated_at
AFTER UPDATE ON users FOR EACH ROW
BEGIN
UPDATE users SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END;
CREATE TRIGGER update_sources_updated_at
AFTER UPDATE ON sources FOR EACH ROW
BEGIN
UPDATE sources SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END;
CREATE TRIGGER update_categories_updated_at
AFTER UPDATE ON categories FOR EACH ROW
BEGIN
UPDATE categories SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END;
CREATE TRIGGER update_reports_updated_at
AFTER UPDATE ON reports FOR EACH ROW
BEGIN
UPDATE reports SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END;
CREATE TRIGGER update_api_keys_updated_at
AFTER UPDATE ON api_keys FOR EACH ROW
BEGIN
UPDATE api_keys SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END;
-- Trigger to auto-populate domain field
CREATE TRIGGER set_sources_domain
BEFORE INSERT ON sources FOR EACH ROW
BEGIN
UPDATE sources SET domain = CASE
WHEN NEW.url LIKE 'http%://www.%' THEN substr(NEW.url, instr(NEW.url, '://www.') + 7, instr(substr(NEW.url, instr(NEW.url, '://www.') + 7), '/') - 1)
WHEN NEW.url LIKE 'http%://%' THEN substr(NEW.url, instr(NEW.url, '://') + 3, instr(substr(NEW.url, instr(NEW.url, '://') + 3), '/') - 1)
ELSE NEW.url
END WHERE id = NEW.id;
END;
CREATE TRIGGER set_reports_domain
BEFORE INSERT ON reports FOR EACH ROW
BEGIN
UPDATE reports SET source_domain = CASE
WHEN NEW.source_url LIKE 'http%://www.%' THEN substr(NEW.source_url, instr(NEW.source_url, '://www.') + 7, instr(substr(NEW.source_url, instr(NEW.source_url, '://www.') + 7), '/') - 1)
WHEN NEW.source_url LIKE 'http%://%' THEN substr(NEW.source_url, instr(NEW.source_url, '://') + 3, instr(substr(NEW.source_url, instr(NEW.source_url, '://') + 3), '/') - 1)
ELSE NEW.source_url
END WHERE id = NEW.id;
END;
-- Insert default categories
INSERT INTO categories (name, slug, description, color, priority, icon) VALUES
('Hoax', 'hoax', 'Šírenie nepravdivých informácií a hoaxov', '#EF4444', 5, 'AlertTriangle'),
('Hate Speech', 'hate-speech', 'Nenávistné prejavy proti skupinám ľudí', '#DC2626', 5, 'MessageSquareX'),
('Violence', 'violence', 'Povzbudzovanie k násiliu', '#B91C1C', 5, 'Sword'),
('Racism', 'racism', 'Rasistické a diskriminačné obsahy', '#991B1B', 5, 'Users'),
('Conspiracy', 'conspiracy', 'Konšpiračné teórie', '#F59E0B', 3, 'Eye'),
('Propaganda', 'propaganda', 'Politická propaganda a manipulácia', '#D97706', 2, 'Megaphone'),
('Spam', 'spam', 'Spam a podvodné obsahy', '#6B7280', 1, 'Mail'),
('Extremism', 'extremism', 'Extrémistické ideológie', '#7C2D12', 5, 'Flame'),
('Medical Misinformation', 'medical-misinfo', 'Nepravdivé zdravotné informácie', '#EA580C', 4, 'Heart'),
('Financial Scam', 'financial-scam', 'Finančné podvody a pyramid schemes', '#C2410C', 4, 'DollarSign');
-- Insert default admin user (password: admin123 - change in production!)
INSERT INTO users (email, password_hash, name, role) VALUES
('admin@antihoax.sk', '$2b$12$LQv3c1yqBWVHxkd0LHAkCOYz6TtxMQJqhN8/LewLON5QhMlPsJiTi', 'System Admin', 'admin');
-- Insert some example API key (for testing)
INSERT INTO api_keys (key_hash, name, description, owner_email, permissions, rate_limit) VALUES
('$2b$12$example_hash_here', 'Chrome Extension', 'API key for browser extension', 'extension@antihoax.sk', '["read"]', 10000);
-- Views for easier querying (SQLite compatible)
-- View: Sources with categories
CREATE VIEW sources_with_categories AS
SELECT
s.*,
GROUP_CONCAT(c.name) as category_names,
GROUP_CONCAT(c.slug) as category_slugs,
GROUP_CONCAT(c.color) as category_colors
FROM sources s
LEFT JOIN source_categories sc ON s.id = sc.source_id
LEFT JOIN categories c ON sc.category_id = c.id AND c.is_active = 1
WHERE s.status = 'verified'
GROUP BY s.id;
-- View: Dashboard statistics
CREATE VIEW dashboard_stats AS
SELECT
(SELECT COUNT(*) FROM sources WHERE status = 'verified') as total_sources,
(SELECT COUNT(*) FROM sources WHERE status = 'pending') as pending_sources,
(SELECT COUNT(*) FROM reports WHERE status = 'pending') as pending_reports,
(SELECT COUNT(*) FROM sources WHERE status = 'verified' AND risk_level >= 4) as high_risk_sources,
(SELECT COUNT(*) FROM sources WHERE created_at > datetime('now', '-7 days')) as sources_added_week,
(SELECT COUNT(*) FROM reports WHERE created_at > datetime('now', '-1 day')) as reports_today;
-- View: Top risky domains
CREATE VIEW top_risky_domains AS
SELECT
domain,
COUNT(*) as source_count,
AVG(risk_level) as avg_risk_level,
MAX(risk_level) as max_risk_level,
GROUP_CONCAT(DISTINCT c.name) as categories
FROM sources s
LEFT JOIN source_categories sc ON s.id = sc.source_id
LEFT JOIN categories c ON sc.category_id = c.id
WHERE s.status = 'verified'
GROUP BY domain
HAVING AVG(risk_level) >= 3
ORDER BY avg_risk_level DESC, source_count DESC;