#+title: IaC AWS for ml-survey #+author: Marcus Kammer * About This org file is part of the [[https://code.siemens.com/marcus.kammer/ml-survey][ml-survey]] repository. It is meant to be the infrastructure-as-code documentation. * Cloudinit :PROPERTIES: :header-args:yaml: :tangle cloudinit.yml :END: ** Introduction This document explains the structure and content of our ~cloudinit.yml~ file, which is used to initialize our AWS EC2 instance. The code blocks in this file can be tangled to create the final ~cloudinit.yml~ file. ** Cloud-Config Header Every cloud-init file should start with "#cloud-config". This tells cloud-init that the file is a cloud-config file. #+BEGIN_SRC yaml #cloud-config #+END_SRC ** Locale and Keyboard Settings Set the system locale and keyboard layout. #+BEGIN_SRC yaml locale: en_US.UTF-8 keyboard: layout: us #+END_SRC ** Timezone Setting Set the system timezone. #+BEGIN_SRC yaml timezone: Europe/Berlin #+END_SRC ** Group Creation Create any necessary system groups. #+BEGIN_SRC yaml groups: - nginxgroup #+END_SRC ** User Creation and Configuration Create and configure users. Here we're creating two users: a system user for Nginx and a regular user for administration. #+BEGIN_SRC yaml users: - name: nginxuser system: true shell: /usr/sbin/nologin groups: nginxgroup sudo: null - name: cl groups: users, admin sudo: ALL=(ALL) NOPASSWD:ALL shell: /bin/bash ssh_authorized_keys: - ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIA+46Y3AHPLJgz8KK61doqH3jBX2TL3TJvZsJrB9Km03 visua@xps-8930 - ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMIHJ5qpMIKL7N3nC0GG1O4ygtkqOlQuZReoik6xGBxn marcus@XPS-13-9380.local - ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIB6xSH5nE0uy0C0kglpp4EqrbbW2CrBeAIj+X6Sf2pd0 XPS-8930-Ubuntu_22 #+END_SRC ** Package Installation Install necessary packages. #+BEGIN_SRC yaml packages: - detachtty - fail2ban - ufw - unattended-upgrades - sbcl - mosh - tmux - git - nginx - certbot - python3-certbot-nginx - build-essential - libzstd-dev - libsqlite3-dev - sqlite3 - curl - wget package_update: true package_upgrade: true #+END_SRC ** File Writing Write configuration files and scripts to the instance. #+BEGIN_SRC yaml write_files: #+END_SRC *** Automatic Upgrades Configuration #+BEGIN_SRC yaml - path: /etc/apt/apt.conf.d/20auto-upgrades content: | APT::Periodic::Update-Package-Lists "1"; APT::Periodic::Download-Upgradeable-Packages "1"; APT::Periodic::AutocleanInterval "7"; APT::Periodic::Unattended-Upgrade "1"; #+END_SRC *** SSH Configuration #+BEGIN_SRC yaml - path: /etc/ssh/sshd_config content: | Include /etc/ssh/sshd_config.d/*.conf MaxAuthTries 3 AuthorizedKeysFile .ssh/authorized_keys PasswordAuthentication no AuthenticationMethods publickey PubkeyAuthentication yes PermitRootLogin no KbdInteractiveAuthentication no UsePAM yes AllowAgentForwarding no AllowTcpForwarding yes X11Forwarding no PrintMotd no KexAlgorithms curve25519-sha256@libssh.org Ciphers chacha20-poly1305@openssh.com MACs hmac-sha2-512-etm@openssh.com AcceptEnv LANG LC_* Subsystem sftp /usr/lib/openssh/sftp-server AllowUsers cl #+END_SRC *** Fail2Ban Configuration #+BEGIN_SRC yaml - path: /etc/fail2ban/jail.local content: | [DEFAULT] bantime = 3600 findtime = 600 maxretry = 3 banaction = ufw [sshd] enabled = true port = 22 logpath = /var/log/auth.log [sshd-ddos] filter = sshd enabled = true port = ssh logpath = /var/log/auth.log maxretry = 5 bantime = 600 [nginx-http-auth] enabled = true action = ufw logpath = /var/log/nginx/error.log maxretry = 6 bantime = 3600 findtime = 600 #+END_SRC *** Nginx Configuration #+BEGIN_SRC yaml - path: /etc/nginx/nginx.conf content: | user nginxuser; worker_processes auto; pid /run/nginx.pid; include /etc/nginx/modules-enabled/*.conf; events { worker_connections 768; } http { sendfile on; tcp_nopush on; types_hash_max_size 2048; include /etc/nginx/mime.types; default_type application/octet-stream; ssl_protocols TLSv1.2 TLSv1.3; ssl_prefer_server_ciphers on; log_format csv '$time_iso8601,$remote_addr,$remote_user,"$request",$status,$body_bytes_sent,$http_referer,"$http_user_agent"'; access_log /var/log/nginx/access.csv csv; error_log /var/log/nginx/error.log; gzip on; server_tokens off; include /etc/nginx/conf.d/*.conf; include /etc/nginx/sites-enabled/*; } #+END_SRC *** Nginx Reverse Proxy Configuration #+BEGIN_SRC yaml - path: /etc/nginx/sites-available/reverse-proxy.conf content: | server { listen 80; server_name survey.metalisp.dev; return 301 https://$host$request_uri; } server { listen 443 ssl; server_name survey.metalisp.dev; ssl_certificate /etc/letsencrypt/live/survey.metalisp.dev/fullchain.pem; ssl_certificate_key /etc/letsencrypt/live/survey.metalisp.dev/privkey.pem; include /etc/letsencrypt/options-ssl-nginx.conf; ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; location / { proxy_pass http://localhost:8080; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; } } #+END_SRC *** Git Configuration Script #+BEGIN_SRC yaml - path: /home/cl/setup_git.sh owner: 'cl:cl' permissions: '0755' defer: True content: | #!/bin/bash git config --global user.email "marcus.kammer@mailbox.org" git config --global user.name "Marcus Kammer" git config --global init.defaultBranch main git config --global pull.rebase true #+END_SRC *** Repository Setup Script #+BEGIN_SRC yaml - path: /home/cl/setup_repos.sh owner: 'cl:cl' permissions: '0755' defer: True content: | #!/bin/bash git clone https://github.com/slime/slime.git ~/slime && cd ~/slime && git checkout v2.30 #+END_SRC *** User Setup Script #+BEGIN_SRC yaml - path: /home/cl/setup_user_all.sh owner: 'cl:cl' permissions: '0755' defer: True content: | #!/bin/bash /bin/bash /home/cl/setup_git.sh /bin/bash /home/cl/setup_repos.sh ssh-keygen -t ed25519 -C 'survey.metalisp' -f ~/.ssh/id_ed25519 -N '' mkdir -p ~/www/survey/docs/ #+END_SRC *** OpenAI Bot Blocking Script #+BEGIN_SRC yaml - path: /home/cl/openai_block_access.sh owner: 'cl:cl' permissions: '0755' defer: True content: | #!/bin/bash file="/tmp/out.txt.$$" wget -q -O "$file" https://openai.com/gptbot-ranges.txt 2>/dev/null while IFS= read -r cidr do sudo ufw deny proto tcp from $cidr to any port 80 sudo ufw deny proto tcp from $cidr to any port 443 done < "$file" [ -f "$file" ] && rm -f "$file" #+END_SRC *** Tmux Configuration #+BEGIN_SRC yaml - path: /home/cl/.tmux.conf owner: 'cl:cl' permissions: '0755' defer: True content: | set -g default-terminal "screen-256color" unbind C-b set -g prefix C-a bind C-a send-prefix set -g mouse on bind -n M-Left select-pane -L bind -n M-Right select-pane -R bind -n M-Up select-pane -U bind -n M-Down select-pane -D bind -n M-h resize-pane -L 2 bind -n M-j resize-pane -D 2 bind -n M-k resize-pane -U 2 bind -n M-l resize-pane -R 2 bind | split-window -h bind - split-window -v bind r source-file ~/.tmux.conf unbind ^A bind ^A select-pane -t :.+ set -g history-limit 50000 set -g status-bg colour235 set -g status-fg colour137 set -g status-interval 5 set -g status-left "#[fg=colour81]#H " set -g status-right "#[fg=colour137]#(date '+%Y-%m-%d %H:%M')" setw -g window-status-current-style fg=colour125,bg=colour235,bold set -g pane-border-style fg=colour238 set -g pane-active-border-style fg=colour81 set -g message-style fg=colour166,bg=colour235 setw -g window-status-current-format "#[bold,fg=colour81]#I:#W#F" setw -g window-status-format "#[fg=colour137]#I:#W#F" #+END_SRC *** SQL Access Logs Script #+BEGIN_SRC yaml - path: /home/cl/access_logs.sql owner: 'cl:cl' defer: True content: | .mode csv CREATE TABLE IF NOT EXISTS access_logs ( timestamp TEXT, ip_address TEXT, remote_user TEXT, request TEXT, status_code INTEGER, body_bytes_sent INTEGER, http_referer TEXT, http_user_agent TEXT ); .import '/var/log/nginx/access.csv' access_logs #+END_SRC *** Pi-hole UFW Setup Script #+BEGIN_SRC yaml - path: /home/cl/pihole_set_ufw.sh owner: 'cl:cl' permissions: '0755' defer: True content: | #!/bin/bash sudo ufw allow 80/tcp sudo ufw allow 53/tcp sudo ufw allow 53/udp sudo ufw allow 67/tcp sudo ufw allow 67/udp sudo ufw allow 546:547/udp #+END_SRC *** SBCL System-wide Configuration #+BEGIN_SRC yaml - path: /etc/sbclrc content: | (let ((script (and (second *posix-argv*) (probe-file (second *posix-argv*))))) (when script (set-dispatch-macro-character #\# #\! (lambda (stream char arg) (declare (ignore char arg)) (read-line stream))) (setf *invoke-debugger-hook* (lambda (condition hook) (declare (ignore hook)) (format *error-output* "Error: ~A~%" condition) (quit))) (load script) (quit))) (defun print-condition-hook (condition hook) (declare (ignore hook)) (princ condition) (clear-input) (abort)) *debugger-hook* (setf *debugger-hook* #'print-condition-hook) #+END_SRC *** SBCL User Configuration #+BEGIN_SRC yaml - path: /home/cl/.sbclrc owner: 'cl:cl' defer: True content: | (sb-ext:set-sbcl-source-location #P"~/sbcl/") #+END_SRC *** SBCL Setup Script #+BEGIN_SRC yaml - path: /home/cl/lisp_01_setup_sbcl.sh owner: 'cl:cl' permissions: '0755' defer: True content: | #!/bin/bash set -e sudo apt update sudo apt install -y sbcl git libzstd-dev git clone --branch sbcl-2.4.7 git://git.code.sf.net/p/sbcl/sbcl ~/sbcl cd sbcl sh make.sh --fancy sudo sh install.sh sudo apt remove sbcl -y cd ~/ && sbcl --version #+END_SRC *** Quicklisp Setup Script #+BEGIN_SRC yaml - path: /home/cl/lisp_02_setup_quicklisp.sh owner: 'cl:cl' permissions: '0755' defer: True content: | #!/bin/bash sudo apt install -y libev4 libsqlite3-dev curl https://beta.quicklisp.org/quicklisp.lisp -o ~/quicklisp.lisp sbcl --noinform --load quicklisp.lisp --eval "(quicklisp-quickstart:install)" --eval "(ql-util:without-prompting (ql:add-to-init-file))" --non-interactive sbcl --noinform --eval "(ql:quickload '(:hunchentoot :spinneret))" --non-interactive #+END_SRC *** Swank Faster Loading Script #+BEGIN_SRC yaml - path: /home/cl/lisp_03_load_swank_faster.lisp owner: 'cl:cl' defer: True content: | (mapc 'require '(sb-bsd-sockets sb-posix sb-introspect sb-cltl2 asdf)) (ql:quickload '(:hunchentoot :spinneret)) (save-lisp-and-die "sbcl.core-for-slime") #+END_SRC *** Script to Run Swank Faster Loading #+BEGIN_SRC yaml - path: /home/cl/lisp_03_load_swank_faster.sh owner: 'cl:cl' permissions: '0755' defer: True content: | #!/bin/bash sbcl --noinform --noprint --load ~/lisp_03_load_swank_faster.lisp #+END_SRC *** Emacs Build Script #+BEGIN_SRC yaml - path: /home/cl/emacs_build.sh owner: cl:cl permissions: '0755' defer: True content: | #!/bin/bash git clone git@git.sr.ht:~marcuskammer/emacs.d-lisp ~/.emacs.d/ sudo apt update sudo apt install -y build-essential git autoconf texinfo libncurses-dev libgnutls28-dev libjansson-dev libgccjit-13-dev pkg-config zlib1g-dev libtree-sitter-dev libxml2-dev git clone git://git.sv.gnu.org/emacs.git ~/emacs-src && cd emacs-src git checkout emacs-29 ./autogen.sh ./configure --without-xpm --without-jpeg --without-png --without-gif --without-tiff --without-xpm --without-rsvg --without-webp --without-lcms2 --without-cairo --without-gpm --with-json --with-native-compilation --with-tree-sitter make -j$(nproc) sudo make install #+END_SRC *** Emacs Service Configuration #+BEGIN_SRC yaml - path: /home/cl/.config/systemd/user/emacs.service owner: cl:cl defer: True content: | [Unit] Description=Emacs text editor Documentation=info:emacs man:emacs(1) https://gnu.org/software/emacs/ [Service] Type=forking ExecStart=emacs --daemon ExecStop=emacsclient --eval "(kill-emacs)" Environment=SSH_AUTH_SOCK=%t/keyring/ssh Restart=on-failure [Install] WantedBy=default.target #+END_SRC ** Run Commands Execute commands after the instance has been set up. #+BEGIN_SRC yaml runcmd: - curl https://ssl-config.mozilla.org/ffdhe2048.txt > /etc/letsencrypt/ssl-dhparam.pem - ln -s /etc/nginx/sites-available/reverse-proxy.conf /etc/nginx/sites-enabled/ - rm /etc/nginx/sites-enabled/default - systemctl reload nginx - ufw allow 'Nginx Full' - ufw default deny incoming - ufw default allow outgoing - ufw allow 22/tcp - ufw allow mosh - ufw enable - systemctl enable fail2ban && systemctl start fail2ban - systemctl restart sshd - sudo -u cl /bin/bash /home/cl/setup_user_all.sh #+END_SRC ** Conclusion This concludes the documentation for our ~cloudinit.yml~ file. To generate the actual YAML file from this Org document, you can use the following Emacs command: ~C-c C-v t~ Or in an Org-mode babel shell block: #+BEGIN_SRC emacs-lisp :results silent (org-babel-tangle) #+END_SRC This will create the ~cloudinit.yml~ file with all the code blocks in the correct order and with proper indentation. Remember to review the generated YAML file to ensure all indentations are correct, as YAML is sensitive to indentation. * Terraform and AWS :PROPERTIES: :header-args:hcl: :tangle main.tf :mkdirp yes :END: ** Introduction This tutorial will guide you through creating a ~main.tf~ file for setting up basic AWS infrastructure using Terraform. We'll explain each resource, why it's necessary, and the order in which they should be created. The code blocks in this file can be tangled to create the final ~main.tf~ file. #+name: tf-graph #+begin_src powershell :results output terraform graph #+end_src #+begin_src dot :var g=tf-graph :file tf-graph.png :exports results $g #+end_src #+RESULTS: [[file:tf-graph.png]] #+name: tf-plan #+begin_src powershell :results output :exports none terraform plan #+end_src #+name: tf-destroy #+begin_src powershell :results output :exports none terraform destroy -auto-approve #+end_src ** Define Global Variables #+begin_src hcl :tangle variables.tf variable "host_os" { type = string default = "windows" } #+end_src #+begin_src hcl :tangle terraform.tfvars host_os = "windows" #+end_src ** Virtual Private Cloud (VPC) We start with creating a VPC, which is a virtual network dedicated to your AWS account. It's the foundation for all other resources. #+BEGIN_SRC hcl resource "aws_vpc" "mlsurvey_vpc" { cidr_block = "10.0.0.0/16" enable_dns_hostnames = true enable_dns_support = true tags = { Name = "ml-survey-vpc" } } #+END_SRC This VPC: - Has a CIDR block of 10.0.0.0/16, allowing for up to 65,536 IP addresses. - Enables DNS hostnames and support, which are necessary for EC2 instances to have DNS names. ** Subnet Next, we create a subnet within our VPC. Subnets allow you to partition your network to group resources together. #+BEGIN_SRC hcl resource "aws_subnet" "mlsurvey_public_subnet" { vpc_id = aws_vpc.mlsurvey_vpc.id cidr_block = "10.0.1.0/24" map_public_ip_on_launch = true availability_zone = "eu-central-1a" tags = { Name = "ml-survey-public" } } #+END_SRC This subnet: - Is associated with our VPC. - Has a CIDR block of 10.0.1.0/24, allowing for up to 256 IP addresses. - Automatically assigns public IP addresses to instances launched in it. - Is located in the eu-central-1a availability zone. ** Internet Gateway An Internet Gateway allows communication between your VPC and the internet. #+BEGIN_SRC hcl resource "aws_internet_gateway" "mlsurvey_internet_gateway" { vpc_id = aws_vpc.mlsurvey_vpc.id tags = { Name = "ml-survey-igw" } } #+END_SRC This Internet Gateway is attached to our VPC, enabling internet access for resources within the VPC. ** Route Table A route table contains a set of rules (routes) that determine where network traffic is directed. #+BEGIN_SRC hcl resource "aws_route_table" "mlsurvey_public_rt" { vpc_id = aws_vpc.mlsurvey_vpc.id tags = { Name = "ml-survey-rt" } } #+END_SRC This route table is associated with our VPC and will contain the rules for routing traffic. ** Route We add a route to our route table to direct internet-bound traffic to the Internet Gateway. #+BEGIN_SRC hcl resource "aws_route" "mlsurvey_default_route" { route_table_id = aws_route_table.mlsurvey_public_rt.id destination_cidr_block = "0.0.0.0/0" gateway_id = aws_internet_gateway.mlsurvey_internet_gateway.id } #+END_SRC This route sends all traffic (0.0.0.0/0) to the Internet Gateway, allowing resources in our VPC to access the internet. ** Route Table Association We associate our route table with the subnet to apply the routing rules. #+BEGIN_SRC hcl resource "aws_route_table_association" "mlsurvey_public_assoc" { subnet_id = aws_subnet.mlsurvey_public_subnet.id route_table_id = aws_route_table.mlsurvey_public_rt.id } #+END_SRC This association ensures that the routing rules apply to resources in our subnet. ** Security Group A security group acts as a virtual firewall for your instance to control inbound and outbound traffic. #+BEGIN_SRC hcl resource "aws_security_group" "mlsurvey_sg" { name = "ml-survey-sg" description = "ml-survey security group" vpc_id = aws_vpc.mlsurvey_vpc.id ingress { from_port = 0 to_port = 0 protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } egress { from_port = 0 to_port = 0 protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } } #+END_SRC This security group allows all inbound and outbound traffic. In a production environment, you would typically restrict this for better security. ** Key Pair A key pair is used to securely SSH into your EC2 instances. #+BEGIN_SRC hcl resource "aws_key_pair" "mlsurvey_auth" { key_name = "ml-survey-key" public_key = file("~/.ssh/ml-survey-key.pub") } #+END_SRC This resource uploads your public key to AWS, allowing you to use the corresponding private key to SSH into instances. ** EC2 Instance Finally, we create an EC2 instance, which is a virtual server in Amazon's Elastic Compute Cloud (EC2) for running applications. #+BEGIN_SRC hcl resource "aws_instance" "dev_node" { instance_type = "t2.micro" ami = data.aws_ami.server_ami.id key_name = aws_key_pair.mlsurvey_auth.id vpc_security_group_ids = [aws_security_group.mlsurvey_sg.id] subnet_id = aws_subnet.mlsurvey_public_subnet.id user_data = data.cloudinit_config.config.rendered user_data_replace_on_change = true tags = { Name = "dev-node" } } #+END_SRC This EC2 instance: - Uses the t2.micro instance type. - Uses the AMI specified in the ~aws_ami~ data source. - Uses the key pair we created for SSH access. - Is placed in our VPC and subnet. - Has the security group we created applied to it. - Uses the cloud-init configuration we specified. ** Output Lastly, we add an output to display the public IP of our instance. #+BEGIN_SRC hcl output "dev_node_public_ip" { value = aws_instance.dev_node.public_ip } #+END_SRC This output will be displayed after Terraform applies the configuration, making it easy to find the IP address of your new instance. ** Conclusion By tangling all these code blocks, you'll have a complete ~main.tf~ file that sets up a basic AWS infrastructure. The resources are created in a logical order, with each building upon the previous ones to create a fully functional network and compute environment in AWS. #+name: tf-apply #+begin_src powershell :results output :exports none terraform apply -auto-approve #+end_src * Web App Write a tiny web application to test if the infrastructure is successful. #+begin_src lisp (ql:quickload #:hunchentoot) (defpackage sample-web-app (:use #:cl) (:import-from #:hunchentoot #:define-easy-handler #:start #:stop) (:documentation "Sample web app package.")) (in-package #:sample-web-app) #+end_src