Replace telegraf with prometheus exporters

Still missing SMART and ping, but those can come later.

Swapped as the polling model for prometheus doesn't play especially well with telegraf, and leads to gaps in data
This commit is contained in:
Jake Howard 2023-01-31 09:11:09 +00:00
parent 0322291557
commit ddc91059b5
Signed by: jake
GPG Key ID: 57AFB45680EDD477
10 changed files with 89 additions and 201 deletions

View File

@ -2,7 +2,8 @@ collections:
- ansible.posix
- community.general
- community.docker
- amazon.aws # Dependency of rossmcdonald.telegraf
- name: https://github.com/prometheus-community/ansible
type: git
roles:
- src: geerlingguy.docker
@ -13,8 +14,6 @@ roles:
- src: ironicbadger.proxmox_nag_removal
version: 1.0.1
- src: chmduquesne.iptables_persistent
- src: rossmcdonald.telegraf
version: v1.2.0
- src: geerlingguy.gitlab
version: 3.2.0
- src: dokku_bot.ansible_dokku

View File

@ -0,0 +1,3 @@
node_exporter_version: 1.5.0
node_exporter_web_listen_address: "{{ private_ip }}:9100"
node_exporter_enabled_collectors: [] # Disable the systemd collector by default

View File

@ -85,9 +85,10 @@
become: true
- zfs
- pve_nebula_route
- telegraf
- role: ironicbadger.snapraid
become: true
- role: prometheus.prometheus.node_exporter
become: true
- hosts: forrest
roles:

View File

@ -38,6 +38,16 @@ services:
volumes:
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
proxmox_exporter:
image: prompve/prometheus-pve-exporter:latest
restart: unless-stopped
user: "{{ docker_user.id }}"
environment:
- PVE_USER=prometheus@pve
- PVE_TOKEN_NAME=prometheus
- PVE_TOKEN_VALUE={{ prometheus_api_token }}
- PVE_VERIFY_SSL=false
networks:
grafana:
external: true

View File

@ -5,14 +5,18 @@ alerting:
- alertmanager:9093
scrape_configs:
- job_name: telegraf
metric_relabel_configs:
- source_labels: [__name__]
regex: go_.+
action: drop
- job_name: pve
static_configs:
- targets:
- "{{ pve_hosts.pve.ip }}:9273"
- "{{ pve_hosts.pve.ip }}"
metrics_path: /pve
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: proxmox_exporter:9221
- job_name: traefik
metric_relabel_configs:
@ -102,3 +106,13 @@ scrape_configs:
metrics_path: /projects/{{ healthchecks_project_uuid }}/metrics/{{ healthcheck_api_token }}
static_configs:
- targets: [healthchecks.io]
- job_name: node
metrics_path: /metrics
static_configs:
- targets:
- "{{ pve_hosts.pve.ip }}:9100"
metric_relabel_configs:
- source_labels: [__name__]
regex: go_.+
action: drop

View File

@ -9,3 +9,4 @@ healthcheck_api_token: "{{ vault_healthcheck_api_token }}"
alertmanager_from_address: "{{ vault_alertmanager_from_address }}"
alertmanager_smtp_password: "{{ vault_alertmanager_smtp_password }}"
alertmanager_to_address: "{{ vault_alertmanager_to_address }}"
prometheus_api_token: "{{ vault_prometheus_api_token }}"

View File

@ -1,49 +1,52 @@
$ANSIBLE_VAULT;1.1;AES256
61666339653630373931366533656437386337663739313532663339356562656466373461303837
3132343434633835333035303863346532363137656565380a646239326534346135333737653337
31626663636661396462353765653830396132306531393965306439316437623866306166656430
3664373263643666350a353139653633663233623064353532313136393865333763353233343132
61363864666534636630623639346361656361633235313535323030633434656136626131656237
62633433613537363431336664313166306539616431626131653337363536633536386561373630
32323165393538636437653361363766636139646166646538666462333337333837383966643263
31646562316433323962353439353636393965393962633665306232653737353233346632326233
36333863623933313238663434643737363638666662323238643238326165396433653462663934
62663538366633386533316432323535656563346435353665333430623434616634393565633962
65383262356333643437636531663034303933313534393965633739633031393139386633376134
35653164313830643065613439663462306131383135366363653834386230363639366233643866
30626638306666653065353237663434633333376337343133656237666662343163656164353935
39386262303664653164356431643466306463343564633064376532663133393432626333393236
39306463663963383234363664643265393434363138373062643165313332643833663464343766
33393162336535303136653230633135636266653863646331346630386131303131333636613938
66386331643966313236356634623938363835323439343234613164616132616137353237356162
66333964363361353436396634353735663963643239356164333434373166623731643038656136
32376339316434373836333532373664323762636634336361616437306132356637616162303231
30356263366137363337316631643536643539653237636362386366656434353861343239306431
33326163336536636233643434633461626430313536376632623334303938316364316162386365
38393138356538623235663539393166306366396364306335656534366663616336666637323138
32343035373063306462636163616364616332333263663737653235613230313465363466616565
36373831383364333837303433663839353363663730333665663138386635626530323633626661
62633066633361366562383737646565613566653866663136326135383533303962326637316132
39313832303538363432646432616433303966626133653461363530623966363133646530353931
62646334646336633162633763613333663937636262633138323932313632653630393162363439
37343739313066343831623265353164386239373430623735343739373737373130643631343664
31626633303764633831646565653732326363336431353433353365633433626665353733663336
32643132666565373332323136386630633561656632613335613965343439633831336336306464
62303234646233633366383430366431336430356463643630623566373333646532333663623466
63326364393166393461653366373634626333623836303466636136663331383239336432363661
62383836653562633765333034306161326539396330663831363135326235343535393663363638
36616130663538376534616161393930633462316539643561323665313339646234306232656662
30356139303465313561636433646162346136303632653230353437383063396438616536376533
30363362613635336162336537333932306666666632336664613631623830616538633131663365
36656164623139643333333237376333656632306436313865656337616439343434383231616561
32323232306437353034643735356532653233306131356632393963616337303835663632653837
63656433633061313034656565373061643132623163626230386561323466353534323734326437
34336439363563373637336139376261376439306166656337663366383463313938333731306664
34393062653536346464373637343232666465336564613235323931373465323864386362336561
37633837336136373030346533633562343439316264333462623763666162373938326437353638
63343062313732356338623731626232626265366466623437383734313630393030366330353864
63613766373438366564663761623766353636303066343665353437643830626364663031336537
36663739303232363162646464663339623939363733393835383366636463643633396362363137
62613138656563666661363630633935346433356235303936356431343637663938646161343266
61313861346562396439383262396435313064643264353536326238623461653965626334653866
36646239313666396338
36623535313964653161353330663436356239613837653837393939373034353031646535333535
6439313832316239616233306632373934616134616466380a316361363263373938636161666535
31613461333637373732626233623434316335353964353433643635653566613933393361336139
3864373963396264320a376634346331373762313733323961386566646338633936303631303566
66616534326430653266396635353932623661363533356537636662636537656434363562646230
30613831336561376639393466373739373138313931333163353061633465623362666564313631
66623235353531613737643937613430323934376433393836346339626137616561313062663234
63363736326439623661376132613136383465393761653236663631613339653066356436653630
66623865303735616335373231643233386639323838353534613337316161633765396234366533
33616631663530643764373937346262633734366339303837393737666665363465333239343933
35613962396534336232623833303034643639323931633966396439383463396261313862626335
31323434613838353961336136613966636635646632393839663664376632373834313265643338
30663132633362323831313231333164643665386535323231646262656631383631393539616639
34343563353064303833383236626136666264316236316537333965313162616637323966363335
32353936663162316564306337353861396634353935353935306135343665316262643831396537
61393266383538666563363261646534636632303332343662636631316663343930303766623638
35376565343638316339623061396536643636313966383633346231633631353032356661386132
66623439336338616666626431303635373833666137326234653161336434346133636261363662
39313732303736386137656664303365363234336265643064306562643435633838373864353862
33366635333630373162656630666232333563623066333461653963363961623435646631373561
64643738346138366566303233326663383835386132663034313461383161616164636332396332
37663131386135393833373461663432666264363065666630646164633134303439663435616235
35656234313761376532306264393637653433623863383830323935316332383338623134323366
31336665386137323132363962363335623635336131373930353635353663333366363266303138
35626262613261636561373730626635303836623561643436646430653365663432323938393863
63633331663462323163646237386262376337313330323036613434383165616530643362616131
63616562353964316634646434653138333266646633616631653663663838306163616633643234
61333230373237613436343662363434303766383336376232353066313231666330613761643366
36326638326439653966643430313366376661633636366565393461623438323366373333663633
61633763623631333665363333646433656166633364303836623566333336343761613435353138
37366165613263653564386334303030623333646164303662363065333831376334656537613130
33373864663237383064653461616165653834393063663332643235316139333539623463343161
38636564626466633631393938653066373764663935353763626133623762306164383831663061
34333065326666373337663931313763383739383763333235333939376133363236643136346233
62643833376631643036613963643939333133343036613332313866373032646332363231313139
61373365653665343066636162356336373833393363373866343436323639623435383831363335
30333033326638363930613030356664333233633339666366643062353634333161343838666231
32346332663538653937623136653438636463323463376263303962353562313833373937303066
65303037323030653434313164393766633134306435633263363335636561356264376665363639
35613731373437386566663266656266343639326334303239613862353963323436633836383766
35323930633039396535616265643234303639393035363865643236623838333337626135343665
36373038666332376663333565623362303631663830336131343438353764653831633433363436
36333839303433623966363561313564303037393165383732323763353232653564346138666438
30653836626139356133346538616135313034633966373036303461393562363336386633626365
33393565643730383634346238356462313435366538636234656237613864656165656439363061
32626235323362333239373631383830653035383164646364343461376562636564343063353139
61306535333466653937303635353962376162376431336563316130343530636431623537633332
65373333376338353930316561636530343062653964323463653632653332376432343237656465
63333437613064313438353134333566303033313339323162643061363836643931343135396130
32623435653533326563616263323938343332306362383034663139653965626231336637383939
313534343431303739396263303737303365

View File

@ -1,122 +0,0 @@
# Global tags can be specified here in key="value" format.
[global_tags]
# dc = "us-east-1" # will tag all metrics with dc=us-east-1
# rack = "1a"
## Environment variables can be used as tags, and throughout the config file
# user = "$USER"
# Configuration for telegraf agent
[agent]
## Default data collection interval for all inputs
interval = "60s"
## Rounds collection interval to 'interval'
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
## Telegraf will send metrics to outputs in batches of at most
## metric_batch_size metrics.
## This controls the size of writes that Telegraf sends to output plugins.
metric_batch_size = 1000
## Maximum number of unwritten metrics per output. Increasing this value
## allows for longer periods of output downtime without dropping metrics at the
## cost of higher maximum memory usage.
metric_buffer_limit = 10000
## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
## This can be used to avoid many plugins querying things like sysfs at the
## same time, which can have a measurable effect on the system.
collection_jitter = "0s"
## Default flushing interval for all outputs. Maximum flush_interval will be
## flush_interval + flush_jitter
flush_interval = "10s"
## Jitter the flush interval by a random amount. This is primarily to avoid
## large write spikes for users running a large number of telegraf instances.
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"
## By default or when set to "0s", precision will be set to the same
## timestamp order as the collection interval, with the maximum being 1s.
## ie, when interval = "10s", precision will be "1s"
## when interval = "250ms", precision will be "1ms"
## Precision will NOT be used for service inputs. It is up to each individual
## service input to set the timestamp at the appropriate precision.
## Valid time units are "ns", "us" (or "µs"), "ms", "s".
precision = ""
## Log at debug level.
# debug = false
## Log only error level messages.
# quiet = false
## Log target controls the destination for logs and can be one of "file",
## "stderr" or, on Windows, "eventlog". When set to "file", the output file
## is determined by the "logfile" setting.
# logtarget = "file"
## Name of the file to be logged to when using the "file" logtarget. If set to
## the empty string then logs are written to stderr.
# logfile = ""
## The logfile will be rotated after the time interval specified. When set
## to 0 no time based rotation is performed. Logs are rotated only when
## written to, if there is no log activity rotation may be delayed.
# logfile_rotation_interval = "0d"
## The logfile will be rotated when it becomes larger than the specified
## size. When set to 0 no size based rotation is performed.
# logfile_rotation_max_size = "0MB"
## Maximum number of rotated archives to keep, any older logs are deleted.
## If set to -1, no archives are removed.
# logfile_rotation_max_archives = 5
## Override default hostname, if empty use os.Hostname()
hostname = "{{ ansible_hostname }}"
## If set to true, do no set the "host" tag in the telegraf agent.
omit_hostname = false
###############################################################################
# OUTPUT PLUGINS #
###############################################################################
[[outputs.prometheus_client]]
listen = "{{ private_ip }}:9273"
metric_version = 2
###############################################################################
# INPUT PLUGINS #
###############################################################################
[[inputs.ping]]
urls = ["192.168.1.1", "9.9.9.9"]
ping_interval = 60.0
[[inputs.smart]]
use_sudo = true
[[inputs.proxmox]]
base_url = "https://{{ pve_hosts.pve.ip }}:8006/api2/json/"
api_token = "telegraf@pve!telegraf={{ proxmox_telegraf_token }}"
node_name = "pve"
insecure_skip_verify = true
[[inputs.disk]]
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
[[inputs.system]]
[[inputs.sensors]]
[[inputs.nvidia_smi]]
[[inputs.exec]]
command = "speedtest --json"
name_override = "speedtest"
timeout = "2m"
interval = "10m"
data_format = "json"

View File

@ -1,10 +0,0 @@
- name: Install and configure telegraf
import_role:
name: rossmcdonald.telegraf
become: true
- name: Let telegraf do smart stats
lineinfile:
path: /etc/sudoers
line: "{{ telegraf_runas_user }} ALL=(ALL) NOPASSWD: /usr/sbin/smartctl"
become: true

View File

@ -1,11 +0,0 @@
telegraf_configuration_template: files/telegraf.conf
telegraf_aws_tags: false
proxmox_telegraf_token: !vault |
$ANSIBLE_VAULT;1.1;AES256
34646261333165343031323566643738353363393864363035303037653838383038363162346164
6239313461393366373534636539613639623061393231640a613837343731373462666662356563
34343934313165623433646335383064333136343237353030353532653631633635366135336134
3931336436656561350a376137636666633937636134663139326630623761386435653435653338
36643232396361313436666533373737626365633662383239623561373061313636366231636330
3539303065366366323137336432613165336462363330363735