Compare commits
556 Commits
62dd71b860
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
21b38d2a30 | ||
|
|
99d9d193ac | ||
|
|
f47cd7ebf2 | ||
|
|
dbf67e7298 | ||
|
|
67df61e044 | ||
|
|
843e782340 | ||
|
|
541ad16e06 | ||
|
|
a605d38750 | ||
|
|
3334f0a1d2 | ||
|
|
2871bdcc92 | ||
|
|
c03b5b80a1 | ||
|
|
7e7bd06bca | ||
|
|
2e2d069530 | ||
|
|
c2efb7fbf7 | ||
|
|
724054296e | ||
|
|
f772e868e6 | ||
|
|
c970120b9f | ||
|
|
0ee763ba51 | ||
|
|
cc076b1923 | ||
|
|
1ddae94930 | ||
|
|
5043e1d4a1 | ||
|
|
940bf18aba | ||
|
|
433155ae0c | ||
|
|
73e06d8164 | ||
|
|
25bbd759d0 | ||
|
|
d90cd5ef2d | ||
|
|
4abcc610d5 | ||
|
|
6b7b5ae1ff | ||
|
|
07a2fd6090 | ||
|
|
5238c58dd5 | ||
|
|
5eb07f5047 | ||
|
|
bc8bd590a6 | ||
|
|
9c115f892e | ||
|
|
cddf056925 | ||
|
|
435153da6f | ||
|
|
cc17cdd020 | ||
|
|
1427d58e00 | ||
|
|
47c303afa0 | ||
|
|
664acf2614 | ||
|
|
463c6348b3 | ||
|
|
7837272a46 | ||
|
|
2535741474 | ||
|
|
32c8786d06 | ||
|
|
82e2593332 | ||
|
|
da3895c31d | ||
|
|
83a10de497 | ||
|
|
9031533077 | ||
|
|
b4c6e45d35 | ||
|
|
7066795c3c | ||
|
|
6e4977ea3b | ||
|
|
1809574fe6 | ||
|
|
858975f03f | ||
|
|
30db609aff | ||
|
|
45b5f473c9 | ||
|
|
71688dea16 | ||
|
|
b63b9aa4bb | ||
|
|
7565cdcf2f | ||
|
|
b7d390adf4 | ||
|
|
29769480be | ||
|
|
657281461d | ||
|
|
af53eca7f6 | ||
|
|
4f6e7ea4dc | ||
| 96fc55b932 | |||
|
|
cd92036f91 | ||
|
|
2f7c6aa005 | ||
|
|
f945d10d48 | ||
|
|
ddb18c4f17 | ||
|
|
f1aa1ea19f | ||
|
|
a3c0e9aa7f | ||
|
|
5216dab043 | ||
|
|
5864553fed | ||
|
|
140ea88460 | ||
|
|
581dc1ad13 | ||
|
|
e198c13e8a | ||
|
|
1e78439ddd | ||
|
|
1a307da6b2 | ||
|
|
885f2be16b | ||
|
|
945ecd78cf | ||
|
|
3f69e546e4 | ||
|
|
340d954fed | ||
|
|
484a55f4f4 | ||
|
|
cc5d88d708 | ||
|
|
046f08fe87 | ||
|
|
56bddcc747 | ||
|
|
71f3b70b86 | ||
|
|
5a579415a1 | ||
|
|
1ff30905f7 | ||
|
|
afdaee628b | ||
|
|
80dafe685b | ||
|
|
198811b752 | ||
|
|
8a64a9e04c | ||
|
|
f291d7c24d | ||
|
|
9b9b56043c | ||
|
|
4985348827 | ||
|
|
e98d790874 | ||
|
|
2bad9c3e48 | ||
|
|
6f658b6648 | ||
|
|
b95e80a24a | ||
|
|
6fbcf10ee4 | ||
|
|
2f75b2865b | ||
|
|
2e51deb511 | ||
|
|
20aefd5bf6 | ||
|
|
f6657f811b | ||
|
|
7300424a49 | ||
|
|
1ae5a1a27e | ||
|
|
896b7e6e91 | ||
|
|
0499a54ebc | ||
|
|
ddc0b686c3 | ||
|
|
cf84d80de7 | ||
|
|
2ebe4989bb | ||
|
|
551a7f12b5 | ||
|
|
ec51aef802 | ||
|
|
e0be6a069f | ||
|
|
0e512a3c0c | ||
|
|
f6b76b2d5e | ||
|
|
8e9ad47077 | ||
|
|
c5b6f2bbad | ||
| 83c3ac3ef3 | |||
| 7dd7317cb8 | |||
| 2654271494 | |||
|
|
888f589934 | ||
|
|
9aad2f3871 | ||
|
|
cbaac2bfa5 | ||
|
|
7529a9ce99 | ||
|
|
09309de982 | ||
|
|
56c41814fc | ||
|
|
68704e15b4 | ||
|
|
510206c752 | ||
|
|
58e9695b4c | ||
|
|
f27a0044f1 | ||
|
|
5c9323cfed | ||
|
|
2dcbd5a772 | ||
|
|
f9b5f235cc | ||
|
|
0b419db9f1 | ||
|
|
5f6f9e523d | ||
|
|
35319dc666 | ||
|
|
3c2409ed6e | ||
|
|
ca401363ec | ||
|
|
b5ee9e1d1f | ||
|
|
75a41929c4 | ||
|
|
d58c8cde2e | ||
|
|
64608a7677 | ||
|
|
48ce75bf38 | ||
|
|
0bbe5d6623 | ||
|
|
e1ac896a6e | ||
|
|
58009d7c23 | ||
|
|
b799d55835 | ||
|
|
166568edea | ||
|
|
f049a0a6a0 | ||
|
|
f8e382c217 | ||
|
|
c7e5c7fa2d | ||
|
|
0995ab35c4 | ||
|
|
480a53c80c | ||
|
|
d3ce5e861b | ||
|
|
e5c8fff0f9 | ||
|
|
21db92ff00 | ||
|
|
165c9f10e3 | ||
|
|
ade1733418 | ||
|
|
0cf64b2928 | ||
|
|
0fc9c8cb4c | ||
|
|
fe4a6dbf24 | ||
|
|
9cfe3985d0 | ||
|
|
18da187960 | ||
|
|
9c1bd24f16 | ||
|
|
177673ba62 | ||
|
|
77f5c82dfe | ||
|
|
663a6624a7 | ||
|
|
cc3cd610b2 | ||
|
|
b6239bdb6b | ||
|
|
0ae27ad9ed | ||
|
|
e00848dc65 | ||
|
|
f31975e0ef | ||
|
|
2c0cf7dc9c | ||
|
|
fb7b15f539 | ||
|
|
1d7009d69c | ||
|
|
99a91a57be | ||
|
|
427988bcc8 | ||
|
|
a208f2eec7 | ||
|
|
13f218d522 | ||
|
|
900fba5af6 | ||
|
|
b3d1dd377d | ||
|
|
e36c82c4db | ||
|
|
d192f6b57c | ||
|
|
fe1681e6e8 | ||
|
|
571f85cd0f | ||
|
|
25d2a3014a | ||
|
|
1a97e2146e | ||
|
|
d1150e5dd8 | ||
|
|
b0995d84bc | ||
|
|
9756a20223 | ||
|
|
1b4b522233 | ||
|
|
48217e0034 | ||
|
|
c3ecff9d45 | ||
|
|
07099357af | ||
|
|
ed0e616109 | ||
|
|
382e1801a7 | ||
|
|
2312a7304d | ||
|
|
47d5611462 | ||
|
|
9043dc00b0 | ||
|
|
a141e99a07 | ||
|
|
15d00f039c | ||
|
|
064c302073 | ||
|
|
35748ea7a1 | ||
|
|
e558494f8d | ||
|
|
1f0ab002d6 | ||
|
|
242ef1f0af | ||
|
|
c6aef5ab35 | ||
|
|
007597715a | ||
|
|
b6e54db6ec | ||
|
|
e9f523f2b8 | ||
|
|
653f983a08 | ||
|
|
459cdfe427 | ||
|
|
652346dcd4 | ||
|
|
5304c8ee01 | ||
|
|
2c82f29aef | ||
|
|
4371372a26 | ||
|
|
f8dccaae2b | ||
|
|
9ecc9ee72a | ||
|
|
9c54313ff1 | ||
|
|
e5eb48b0fa | ||
|
|
b655de3975 | ||
|
|
4e19f925c6 | ||
|
|
8a7f9cb370 | ||
|
|
b5ecd39100 | ||
|
|
629a009b36 | ||
|
|
ffdaeabc9f | ||
|
|
703bd412ed | ||
|
|
4d4c59efe3 | ||
|
|
837e5d46f5 | ||
|
|
0a71bca7b8 | ||
|
|
b7b6bd2a96 | ||
|
|
d33c039a17 | ||
|
|
6d5ce60608 | ||
|
|
d595746830 | ||
|
|
5a7c0ce4bc | ||
|
|
3a649f40cd | ||
|
|
b1bdb88ea4 | ||
|
|
0e4166bd5f | ||
|
|
42fb6c8b8c | ||
|
|
1579f10a41 | ||
|
|
063a4a5532 | ||
|
|
98a7b7819f | ||
|
|
e96c3cd0cf | ||
|
|
b7c0a225f5 | ||
|
|
f487e6caef | ||
|
|
bb06c4c689 | ||
|
|
5c48b780b2 | ||
|
|
4f5a11f715 | ||
|
|
cc193a1075 | ||
|
|
08efdfa9c5 | ||
|
|
00c7c0cd71 | ||
|
|
d067490f71 | ||
|
|
52ff385b04 | ||
|
|
6052975750 | ||
|
|
0434299d53 | ||
|
|
97f25b4c7e | ||
|
|
6591f2fde3 | ||
|
|
24464c0772 | ||
|
|
e4ccce1e3b | ||
|
|
76352c0d6f | ||
|
|
e716dbf8ca | ||
|
|
76129d407e | ||
|
|
9b1240274d | ||
|
|
a79eafeaf4 | ||
|
|
9b851c4622 | ||
|
|
d3e86b9d77 | ||
|
|
7f9cfc7f18 | ||
|
|
06fa7d832f | ||
|
|
d580b6e90c | ||
|
|
ff95187707 | ||
|
|
1a376eb25f | ||
|
|
58ec67aef9 | ||
|
|
2835d08418 | ||
|
|
79fa4c097c | ||
|
|
c2eab71a31 | ||
|
|
88b003d4f0 | ||
|
|
e6dcad1e07 | ||
|
|
eda74b7339 | ||
|
|
e470fc0dab | ||
|
|
32c52aa22e | ||
|
|
cfc619505a | ||
|
|
e0496fdba2 | ||
|
|
f096365e05 | ||
|
|
36cb93ecdd | ||
|
|
9960fd8c36 | ||
|
|
4d37dff9f8 | ||
|
|
7677df33e5 | ||
|
|
0f6bafae8e | ||
|
|
377968eb53 | ||
|
|
e483e52eee | ||
|
|
ba4e2bb68f | ||
|
|
989dde23eb | ||
|
|
3c3d90c45b | ||
|
|
5bd0e09df3 | ||
|
|
b8d4b59f40 | ||
|
|
850c030642 | ||
|
|
4acf0aeeff | ||
|
|
0bad014811 | ||
|
|
c2252a0e72 | ||
|
|
b41f34c090 | ||
|
|
6fa8e3aa30 | ||
|
|
031fe725b5 | ||
|
|
2f9b9c9b0f | ||
|
|
817b61058a | ||
|
|
e4492b10e1 | ||
|
|
6f78d0a513 | ||
|
|
1c4a98c0da | ||
|
|
be45ba2d59 | ||
|
|
41df042e98 | ||
|
|
06c6f53bbc | ||
|
|
98cbf8f3fc | ||
|
|
a694491140 | ||
|
|
a9a6b465d4 | ||
|
|
d32208d403 | ||
|
|
6c1cbc289c | ||
|
|
0f635576a3 | ||
|
|
56faabcdf1 | ||
|
|
b55221e90a | ||
|
|
95f90f43dc | ||
|
|
8283d531f6 | ||
|
|
d5adaaab72 | ||
|
|
5684479938 | ||
|
|
a6e7458adb | ||
|
|
87bada1fc7 | ||
|
|
dfacedb0ca | ||
|
|
36571013c1 | ||
|
|
9bda4d8f8d | ||
|
|
10e2b69974 | ||
|
|
e955302fe8 | ||
|
|
97a6b2e010 | ||
|
|
7436a37b99 | ||
|
|
9046070529 | ||
|
|
fb54f9cbd2 | ||
|
|
90083f886a | ||
|
|
74bfabf618 | ||
|
|
b7d201d743 | ||
| 181a479037 | |||
|
|
849265a1c6 | ||
|
|
8a6744d3e9 | ||
|
|
88804aca2c | ||
|
|
0cd0a27452 | ||
|
|
9f28c69709 | ||
|
|
b20f08b3d0 | ||
|
|
35fea645b6 | ||
|
|
2bc214e324 | ||
|
|
837fcbf926 | ||
|
|
e3b656f159 | ||
|
|
be703eb71d | ||
|
|
207ae246af | ||
|
|
69fe80353c | ||
|
|
99b739d946 | ||
|
|
c70fa130ab | ||
|
|
efd8396045 | ||
|
|
dd2a5536ab | ||
|
|
e1321a4002 | ||
|
|
da2819332c | ||
|
|
55b2a00458 | ||
|
|
6e8d890442 | ||
|
|
5b1b3f215a | ||
|
|
82e82350f9 | ||
|
|
e95c21d0cb | ||
|
|
70bf59daca | ||
|
|
c0b8c9a1ad | ||
|
|
414f7204bf | ||
|
|
23d02ba6a0 | ||
|
|
e8de8d88ad | ||
|
|
f037d8c922 | ||
|
|
468132d1dd | ||
|
|
c443fc606a | ||
|
|
05f420d162 | ||
|
|
10e132cd50 | ||
|
|
35f17a7eeb | ||
|
|
e861e0199c | ||
|
|
1b6e6ce40c | ||
|
|
0037309e4f | ||
|
|
3e81572477 | ||
|
|
23f3c3990c | ||
|
|
436a0e4d4c | ||
|
|
a74785f64d | ||
|
|
588e0b723a | ||
|
|
c87c77c1cf | ||
|
|
b16ea8b185 | ||
|
|
4a63149338 | ||
|
|
a2b2ccbab7 | ||
|
|
52a08a8769 | ||
|
|
3d0a4d289b | ||
|
|
037a27d405 | ||
|
|
e7ce1a73d0 | ||
|
|
46867cc659 | ||
|
|
efa8390108 | ||
|
|
e590682f8f | ||
|
|
83837ada8f | ||
|
|
f8c1ba4988 | ||
|
|
ae6473635d | ||
|
|
6b5aefd4c2 | ||
|
|
1ea0258393 | ||
|
|
09b49f096c | ||
|
|
18cacb33ee | ||
|
|
d850d00bab | ||
|
|
579b5f1a04 | ||
| ec460faf02 | |||
|
|
1ebc2fa71e | ||
|
|
d88bede097 | ||
|
|
bcde6678b8 | ||
|
|
5edf7eb23a | ||
|
|
1ed2d3a611 | ||
|
|
f75ee9f352 | ||
|
|
9f109b20fd | ||
|
|
5ebc729b82 | ||
|
|
f4c2cb120b | ||
|
|
8689643e11 | ||
|
|
0191ca4b13 | ||
|
|
3963ea5591 | ||
|
|
816096f4d1 | ||
|
|
d42a6ca6a8 | ||
|
|
ef8c60c2b5 | ||
|
|
f48fc750f2 | ||
|
|
334e815c25 | ||
|
|
7e91459cd6 | ||
|
|
269a63af1f | ||
|
|
8d8bae4e18 | ||
|
|
891dcaef32 | ||
|
|
54e4217e21 | ||
|
|
167d0ebd42 | ||
|
|
019e79a362 | ||
|
|
ac2a943feb | ||
|
|
18e6dde67a | ||
|
|
5ddd89f883 | ||
|
|
38083d7c3f | ||
|
|
197c60126c | ||
|
|
31ee974830 | ||
|
|
51bc796bec | ||
|
|
c6c3dd9cfe | ||
|
|
82c29f46a5 | ||
|
|
83a8912da6 | ||
|
|
1a8b9eb41b | ||
|
|
b2066cdb68 | ||
|
|
1260cbe674 | ||
|
|
0aa1776b57 | ||
|
|
2942025a54 | ||
| 99c3cab84a | |||
| c861482c9e | |||
|
|
39a134a0db | ||
|
|
94e941b026 | ||
| 5373ac6541 | |||
|
|
c9c93ac565 | ||
| ca78aa3962 | |||
| b763155a60 | |||
|
|
aa9e93369f | ||
|
|
b0ba08e572 | ||
|
|
2c82b50ea2 | ||
|
|
7e79ff4d98 | ||
|
|
424894a3e2 | ||
|
|
d74079da63 | ||
|
|
3f036da03d | ||
|
|
8bf45d5456 | ||
|
|
f1abca3a45 | ||
|
|
144915563c | ||
|
|
c79a6234af | ||
|
|
63669bd1d7 | ||
|
|
840a71df94 | ||
|
|
1ab21bc019 | ||
|
|
118ace7cc3 | ||
|
|
e334dfacd3 | ||
|
|
77d1718451 | ||
|
|
841793d7b9 | ||
|
|
c1b34f592b | ||
|
|
d3dd8882bd | ||
|
|
6b48bc63bf | ||
|
|
466aceb920 | ||
|
|
6f1feaa4b0 | ||
|
|
bf178ba141 | ||
|
|
15c0a8273c | ||
|
|
657dc2d407 | ||
|
|
f8cd3f3ee4 | ||
|
|
89db8bd1c5 | ||
|
|
17d2be5638 | ||
|
|
07d0386bf2 | ||
|
|
983b698266 | ||
|
|
e84338fc9a | ||
|
|
55f4cab948 | ||
|
|
891c7f87e3 | ||
|
|
1c74ab8541 | ||
|
|
92a74e7b8d | ||
|
|
c53f642838 | ||
|
|
7c0e94a425 | ||
|
|
7b79d3aa64 | ||
|
|
44e91ccdb5 | ||
|
|
59354fae18 | ||
|
|
f829929b07 | ||
|
|
45028de1db | ||
|
|
930ac20d11 | ||
|
|
f80bc006c1 | ||
|
|
1ff256dce0 | ||
|
|
e7a9042677 | ||
|
|
56a7b6de7d | ||
|
|
530bc32040 | ||
|
|
5103dc91be | ||
|
|
a80c376950 | ||
|
|
59e76bdfb6 | ||
|
|
087dcee5df | ||
|
|
cacedd3f16 | ||
|
|
7358555d56 | ||
|
|
609a86dd03 | ||
|
|
1dd1f10c0e | ||
|
|
0c5f1b5740 | ||
|
|
e7fbf5a7b2 | ||
|
|
3c903fc8dc | ||
|
|
87b8a71205 | ||
|
|
ea4c56e7f6 | ||
|
|
a3c35c7df9 | ||
|
|
94b5db0f5b | ||
|
|
642c040116 | ||
|
|
380ccb102b | ||
|
|
b8565af039 | ||
|
|
46b8f63fd1 | ||
|
|
0c9d12d8e0 | ||
|
|
000e9d2847 | ||
|
|
4922748599 | ||
|
|
262ee91684 | ||
|
|
2224f7d902 | ||
|
|
ffdfd6cd9a | ||
|
|
116038262a | ||
|
|
77a23c270b | ||
|
|
e71edcdd5e | ||
|
|
a9ad0eb841 | ||
|
|
c4cee9718c | ||
|
|
d40833b96a | ||
|
|
57e1d09bc6 | ||
|
|
9292bd5f5f | ||
|
|
a3429a609e | ||
|
|
51feacec1e | ||
|
|
806a817c07 | ||
|
|
89c9b53edd | ||
|
|
07dbfb1391 | ||
|
|
a2d55f7075 | ||
|
|
6d3956935d | ||
|
|
a0a0635ddd | ||
|
|
f1c5a95f12 | ||
|
|
5d9f6735cc | ||
|
|
4f9ee57421 | ||
|
|
ef9bc5a614 | ||
|
|
7f233460aa | ||
|
|
fb7d6db375 | ||
|
|
73309c7e63 | ||
|
|
43f145157d | ||
|
|
c2ce508565 | ||
|
|
a7f53c8993 | ||
|
|
bfb5a7a895 | ||
|
|
20b8d4ccaf | ||
|
|
0194549f25 | ||
|
|
d293dafb99 | ||
|
|
67a834153e | ||
|
|
769752a327 | ||
|
|
e8d6cc5b5d | ||
|
|
b14551de4e |
@@ -5,7 +5,7 @@ paths:
|
|||||||
|
|
||||||
# App Module Key Classes
|
# App Module Key Classes
|
||||||
|
|
||||||
`cameleer-server-app/src/main/java/com/cameleer/server/app/`
|
`cameleer-server-app/src/main/java/io/cameleer/server/app/`
|
||||||
|
|
||||||
## URL taxonomy
|
## URL taxonomy
|
||||||
|
|
||||||
@@ -23,10 +23,12 @@ These paths intentionally stay flat (no `/environments/{envSlug}` prefix). Every
|
|||||||
| `/api/v1/agents/register`, `/refresh`, `/{id}/heartbeat`, `/{id}/events` (SSE), `/{id}/deregister`, `/{id}/commands`, `/{id}/commands/{id}/ack`, `/{id}/replay` | Agent self-service; JWT-bound. |
|
| `/api/v1/agents/register`, `/refresh`, `/{id}/heartbeat`, `/{id}/events` (SSE), `/{id}/deregister`, `/{id}/commands`, `/{id}/commands/{id}/ack`, `/{id}/replay` | Agent self-service; JWT-bound. |
|
||||||
| `/api/v1/agents/commands`, `/api/v1/agents/groups/{group}/commands` | Operator fan-out; target scope is explicit in query params. |
|
| `/api/v1/agents/commands`, `/api/v1/agents/groups/{group}/commands` | Operator fan-out; target scope is explicit in query params. |
|
||||||
| `/api/v1/agents/config` | Agent-authoritative config read; JWT → registry → (app, env). |
|
| `/api/v1/agents/config` | Agent-authoritative config read; JWT → registry → (app, env). |
|
||||||
| `/api/v1/admin/{users,roles,groups,oidc,license,audit,rbac/stats,claim-mappings,thresholds,sensitive-keys,usage,clickhouse,database,environments}` | Truly cross-env admin. Env CRUD URLs use `{envSlug}`, not UUID. |
|
| `/api/v1/admin/{users,roles,groups,oidc,license,audit,rbac/stats,claim-mappings,thresholds,sensitive-keys,usage,clickhouse,database,environments,outbound-connections}` | Truly cross-env admin. Env CRUD URLs use `{envSlug}`, not UUID. |
|
||||||
| `/api/v1/catalog`, `/api/v1/catalog/{applicationId}` | Cross-env discovery is the purpose. Env is an optional filter via `?environment=`. |
|
| `/api/v1/catalog`, `/api/v1/catalog/{applicationId}` | Cross-env discovery is the purpose. Env is an optional filter via `?environment=`. |
|
||||||
| `/api/v1/executions/{execId}`, `/processors/**` | Exchange IDs are globally unique; permalinks. |
|
| `/api/v1/executions/{execId}`, `/processors/**` | Exchange IDs are globally unique; permalinks. |
|
||||||
| `/api/v1/diagrams/{contentHash}/render`, `POST /api/v1/diagrams/render` | Content-addressed or stateless. |
|
| `/api/v1/diagrams/{contentHash}/render`, `POST /api/v1/diagrams/render` | Content-addressed or stateless. |
|
||||||
|
| `/api/v1/artifacts/{appVersionId}` | Init-container artifact pull. HMAC-signed URL is the auth — no JWT context. |
|
||||||
|
| `/api/v1/alerts/notifications/{id}/retry` | Notification IDs are globally unique; no env routing needed. |
|
||||||
| `/api/v1/auth/**` | Pre-auth; no env context exists. |
|
| `/api/v1/auth/**` | Pre-auth; no env context exists. |
|
||||||
| `/api/v1/health`, `/prometheus`, `/api-docs/**`, `/swagger-ui/**` | Server metadata. |
|
| `/api/v1/health`, `/prometheus`, `/api-docs/**`, `/swagger-ui/**` | Server metadata. |
|
||||||
|
|
||||||
@@ -34,26 +36,44 @@ These paths intentionally stay flat (no `/environments/{envSlug}` prefix). Every
|
|||||||
|
|
||||||
ClickHouse is shared across tenants. Every ClickHouse query must filter by `tenant_id` (from `CAMELEER_SERVER_TENANT_ID` env var, resolved via `TenantContext`/config) in addition to `environment`. New controllers added under `/environments/{envSlug}/...` must preserve this — the env filter from the path does not replace the tenant filter.
|
ClickHouse is shared across tenants. Every ClickHouse query must filter by `tenant_id` (from `CAMELEER_SERVER_TENANT_ID` env var, resolved via `TenantContext`/config) in addition to `environment`. New controllers added under `/environments/{envSlug}/...` must preserve this — the env filter from the path does not replace the tenant filter.
|
||||||
|
|
||||||
|
## User ID conventions
|
||||||
|
|
||||||
|
`users.user_id` stores the **bare** identifier:
|
||||||
|
- Local users: `<username>` (e.g. `admin`, `alice`)
|
||||||
|
- OIDC users: `oidc:<sub>` (e.g. `oidc:c7a93b…`)
|
||||||
|
|
||||||
|
JWT subjects carry a `user:` namespace prefix (`user:admin`, `user:oidc:<sub>`) so `JwtAuthenticationFilter` can distinguish user tokens from agent tokens. All three write paths upsert the **bare** form:
|
||||||
|
|
||||||
|
- `UiAuthController.login` — computes `userId = request.username()`, signs with `subject = "user:" + userId`.
|
||||||
|
- `OidcAuthController.callback` — `userId = "oidc:" + oidcUser.subject()`, signs with `subject = "user:" + userId`.
|
||||||
|
- `UserAdminController.createUser` — `userId = request.username()`.
|
||||||
|
|
||||||
|
Env-scoped read-path controllers (`AlertController`, `AlertRuleController`, `AlertSilenceController`, `OutboundConnectionAdminController`) strip `"user:"` from `SecurityContextHolder.authentication.name` before using it as an FK. All FKs to `users(user_id)` (e.g. `alert_rules.created_by`, `outbound_connections.created_by`, `alert_reads.user_id`, `user_roles.user_id`, `user_groups.user_id`) therefore reference the bare form. If you add a new controller that needs the acting user id for an FK insert, follow the same strip pattern.
|
||||||
|
|
||||||
## controller/ — REST endpoints
|
## controller/ — REST endpoints
|
||||||
|
|
||||||
### Env-scoped (user-facing data & config)
|
### Env-scoped (user-facing data & config)
|
||||||
|
|
||||||
- `AppController` — `/api/v1/environments/{envSlug}/apps`. GET list / POST create / GET `{appSlug}` / DELETE `{appSlug}` / GET `{appSlug}/versions` / POST `{appSlug}/versions` (JAR upload) / PUT `{appSlug}/container-config`. App slug uniqueness is per-env (`(env, app_slug)` is the natural key). `CreateAppRequest` body has no env (path), validates slug regex.
|
- `AppController` — `/api/v1/environments/{envSlug}/apps`. GET list / POST create / GET `{appSlug}` / DELETE `{appSlug}` / GET `{appSlug}/versions` / POST `{appSlug}/versions` (JAR upload) / PUT `{appSlug}/container-config` / GET `{appSlug}/dirty-state` (returns `DirtyStateResponse{dirty, lastSuccessfulDeploymentId, differences}` — compares current JAR+config against last RUNNING deployment snapshot; dirty=true when no snapshot exists). App slug uniqueness is per-env (`(env, app_slug)` is the natural key). `CreateAppRequest` body has no env (path), validates slug regex. Injects `DirtyStateCalculator` bean (registered in `RuntimeBeanConfig`, requires `ObjectMapper` with `JavaTimeModule`).
|
||||||
- `DeploymentController` — `/api/v1/environments/{envSlug}/apps/{appSlug}/deployments`. GET list / POST create (body `{ appVersionId }`) / POST `{id}/stop` / POST `{id}/promote` (body `{ targetEnvironment: slug }` — target app slug must exist in target env) / GET `{id}/logs`.
|
- `DeploymentController` — `/api/v1/environments/{envSlug}/apps/{appSlug}/deployments`. GET list / POST create (body `{ appVersionId }`) / POST `{id}/stop` / POST `{id}/promote` (body `{ targetEnvironment: slug }` — target app slug must exist in target env) / GET `{id}/logs`. All lifecycle ops (`POST /` deploy, `POST /{id}/stop`, `POST /{id}/promote`) audited under `AuditCategory.DEPLOYMENT`. Action codes: `deploy_app`, `stop_deployment`, `promote_deployment`. Acting user resolved via the `user:` prefix-strip convention; both SUCCESS and FAILURE branches write audit rows. `created_by` (TEXT, nullable) populated from `SecurityContextHolder` and surfaced on the `Deployment` DTO.
|
||||||
- `ApplicationConfigController` — `/api/v1/environments/{envSlug}`. GET `/config` (list), GET/PUT `/apps/{appSlug}/config`, GET `/apps/{appSlug}/processor-routes`, POST `/apps/{appSlug}/config/test-expression`. PUT also pushes `CONFIG_UPDATE` to LIVE agents in this env.
|
- `ApplicationConfigController` — `/api/v1/environments/{envSlug}`. GET `/config` (list), GET/PUT `/apps/{appSlug}/config`, GET `/apps/{appSlug}/processor-routes`, POST `/apps/{appSlug}/config/test-expression`. PUT accepts `?apply=staged|live` (default `live`). `live` saves to DB and pushes `CONFIG_UPDATE` SSE to live agents in this env (existing behavior); `staged` saves to DB only, skipping the SSE push — used by the unified app deployment page. Audit action is `stage_app_config` for staged writes, `update_app_config` for live. Invalid `apply` values return 400.
|
||||||
- `AppSettingsController` — `/api/v1/environments/{envSlug}`. GET `/app-settings` (list), GET/PUT/DELETE `/apps/{appSlug}/settings`. ADMIN/OPERATOR only.
|
- `AppSettingsController` — `/api/v1/environments/{envSlug}`. GET `/app-settings` (list), GET/PUT/DELETE `/apps/{appSlug}/settings`. ADMIN/OPERATOR only.
|
||||||
- `SearchController` — `/api/v1/environments/{envSlug}`. GET `/executions`, POST `/executions/search`, GET `/stats`, `/stats/timeseries`, `/stats/timeseries/by-app`, `/stats/timeseries/by-route`, `/stats/punchcard`, `/attributes/keys`, `/errors/top`.
|
- `SearchController` — `/api/v1/environments/{envSlug}`. GET `/executions`, POST `/executions/search`, GET `/stats`, `/stats/timeseries`, `/stats/timeseries/by-app`, `/stats/timeseries/by-route`, `/stats/punchcard`, `/attributes/keys`, `/errors/top`. GET `/executions` accepts repeat `attr` query params: `attr=order` (key-exists), `attr=order:47` (exact), `attr=order:4*` (wildcard — `*` maps to SQL LIKE `%`). First `:` splits key/value; later colons stay in the value. Invalid keys → 400. POST `/executions/search` accepts the same filters via `SearchRequest.attributeFilters` in the body.
|
||||||
- `LogQueryController` — GET `/api/v1/environments/{envSlug}/logs` (filters: source, application, agentId, exchangeId, level, logger, q, time range).
|
- `LogQueryController` — GET `/api/v1/environments/{envSlug}/logs` (filters: source (multi, comma-split, OR-joined), level (multi, comma-split, OR-joined), application, agentId, exchangeId, logger, q, time range, instanceIds (multi, comma-split, AND-joined as WHERE instance_id IN (...) — used by the Checkpoint detail drawer to scope logs to a deployment's replicas); sort asc/desc). Cursor-paginated, returns `{ data, nextCursor, hasMore, levelCounts }`; cursor is base64url of `"{timestampIso}|{insert_id_uuid}"` — same-millisecond tiebreak via the `insert_id` UUID column on `logs`.
|
||||||
- `RouteCatalogController` — GET `/api/v1/environments/{envSlug}/routes` (merged route catalog from registry + ClickHouse; env filter unconditional).
|
- `RouteCatalogController` — GET `/api/v1/environments/{envSlug}/routes` (merged route catalog from registry + ClickHouse; env filter unconditional).
|
||||||
- `RouteMetricsController` — GET `/api/v1/environments/{envSlug}/routes/metrics`, GET `/api/v1/environments/{envSlug}/routes/metrics/processors`.
|
- `RouteMetricsController` — GET `/api/v1/environments/{envSlug}/routes/metrics`, GET `/api/v1/environments/{envSlug}/routes/metrics/processors`.
|
||||||
- `AgentListController` — GET `/api/v1/environments/{envSlug}/agents` (registered agents with runtime metrics, filtered to env).
|
- `AgentListController` — GET `/api/v1/environments/{envSlug}/agents` (registered agents with runtime metrics, filtered to env).
|
||||||
- `AgentEventsController` — GET `/api/v1/environments/{envSlug}/agents/events` (lifecycle events).
|
- `AgentEventsController` — GET `/api/v1/environments/{envSlug}/agents/events` (lifecycle events; cursor-paginated, returns `{ data, nextCursor, hasMore }`; order `(timestamp DESC, insert_id DESC)`; cursor is base64url of `"{timestampIso}|{insert_id_uuid}"` — `insert_id` is a stable UUID column used as a same-millisecond tiebreak).
|
||||||
- `AgentMetricsController` — GET `/api/v1/environments/{envSlug}/agents/{agentId}/metrics` (JVM/Camel metrics). Rejects cross-env agents (404) as defence-in-depth.
|
- `AgentMetricsController` — GET `/api/v1/environments/{envSlug}/agents/{agentId}/metrics` (JVM/Camel metrics). Rejects cross-env agents (404) as defence-in-depth.
|
||||||
- `DiagramRenderController` — GET `/api/v1/environments/{envSlug}/apps/{appSlug}/routes/{routeId}/diagram` (env-scoped lookup). Also GET `/api/v1/diagrams/{contentHash}/render` (flat — content hashes are globally unique).
|
- `DiagramRenderController` — GET `/api/v1/environments/{envSlug}/apps/{appSlug}/routes/{routeId}/diagram` returns the most recent diagram for (app, env, route) via `DiagramStore.findLatestContentHashForAppRoute`. Registry-independent — routes whose publishing agents were removed still resolve. Also GET `/api/v1/diagrams/{contentHash}/render` (flat — content hashes are globally unique), the point-in-time path consumed by the exchange viewer via `ExecutionDetail.diagramContentHash`.
|
||||||
|
- `AlertRuleController` — `/api/v1/environments/{envSlug}/alerts/rules`. GET list / POST create / GET `{id}` / PUT `{id}` / DELETE `{id}` / POST `{id}/enable` / POST `{id}/disable` / POST `{id}/render-preview` / POST `{id}/test-evaluate`. OPERATOR+ for mutations, VIEWER+ for reads. CRITICAL: attribute keys in `ExchangeMatchCondition.filter.attributes` are validated at rule-save time against `^[a-zA-Z0-9._-]+$` — they are later inlined into ClickHouse SQL. `AgentLifecycleCondition` is allowlist-only — the `AgentLifecycleEventType` enum (REGISTERED / RE_REGISTERED / DEREGISTERED / WENT_STALE / WENT_DEAD / RECOVERED) plus the record compact ctor (non-empty `eventTypes`, `withinSeconds ≥ 1`) do the validation; custom agent-emitted event types are tracked in backlog issue #145. Webhook validation: verifies `outboundConnectionId` exists and `isAllowedInEnvironment`. Null notification templates default to `""` (NOT NULL constraint). Audit: `ALERT_RULE_CHANGE`.
|
||||||
|
- `AlertController` — `/api/v1/environments/{envSlug}/alerts`. GET list (inbox filtered by userId/groupIds/roleNames via `InAppInboxQuery`; optional multi-value `state`, `severity`, tri-state `acked`, tri-state `read` query params; soft-deleted rows always excluded) / GET `/unread-count` / GET `{id}` / POST `{id}/ack` / POST `{id}/read` / POST `/bulk-read` / POST `/bulk-ack` (VIEWER+) / DELETE `{id}` (OPERATOR+, soft-delete) / POST `/bulk-delete` (OPERATOR+) / POST `{id}/restore` (OPERATOR+, clears `deleted_at`). `requireLiveInstance` helper returns 404 on soft-deleted rows; `restore` explicitly fetches regardless of `deleted_at`. `BulkIdsRequest` is the shared body for bulk-read/ack/delete (`{ instanceIds }`). `AlertDto` includes `readAt`; `deletedAt` is intentionally NOT on the wire. Inbox SQL: `? = ANY(target_user_ids) OR target_group_ids && ? OR target_role_names && ?` — requires at least one matching target (no broadcast concept).
|
||||||
|
- `AlertSilenceController` — `/api/v1/environments/{envSlug}/alerts/silences`. GET list / POST create / DELETE `{id}`. 422 if `endsAt <= startsAt`. OPERATOR+ for mutations, VIEWER+ for list. Audit: `ALERT_SILENCE_CHANGE`.
|
||||||
|
- `AlertNotificationController` — Dual-path (no class-level prefix). GET `/api/v1/environments/{envSlug}/alerts/{alertId}/notifications` (VIEWER+); POST `/api/v1/alerts/notifications/{id}/retry` (OPERATOR+, flat — notification IDs globally unique). Retry resets attempts to 0 and sets `nextAttemptAt = now`.
|
||||||
|
|
||||||
### Env admin (env-slug-parameterized, not env-scoped data)
|
### Env admin (env-slug-parameterized, not env-scoped data)
|
||||||
|
|
||||||
- `EnvironmentAdminController` — `/api/v1/admin/environments`. GET list / POST create / GET `{envSlug}` / PUT `{envSlug}` / DELETE `{envSlug}` / PUT `{envSlug}/default-container-config` / PUT `{envSlug}/jar-retention`. Slug immutable — PUT body has no slug field; any slug supplied is dropped by Jackson. Slug validated on POST.
|
- `EnvironmentAdminController` — `/api/v1/admin/environments`. GET list / POST create / GET `{envSlug}` / PUT `{envSlug}` / DELETE `{envSlug}` / PUT `{envSlug}/default-container-config` / PUT `{envSlug}/jar-retention`. Slug immutable — PUT body has no slug field; any slug supplied is dropped by Jackson. Slug validated on POST. `UpdateEnvironmentRequest` carries `color` (nullable); unknown values rejected with 400 via `EnvironmentColor.isValid`. Null/absent color preserves the existing value.
|
||||||
|
|
||||||
### Agent-only (JWT-authoritative, intentionally flat)
|
### Agent-only (JWT-authoritative, intentionally flat)
|
||||||
|
|
||||||
@@ -66,8 +86,7 @@ ClickHouse is shared across tenants. Every ClickHouse query must filter by `tena
|
|||||||
|
|
||||||
- `LogIngestionController` — POST `/api/v1/data/logs` (accepts `List<LogEntry>`; WARNs on missing identity, unregistered agents, empty payloads, buffer-full drops).
|
- `LogIngestionController` — POST `/api/v1/data/logs` (accepts `List<LogEntry>`; WARNs on missing identity, unregistered agents, empty payloads, buffer-full drops).
|
||||||
- `EventIngestionController` — POST `/api/v1/data/events`.
|
- `EventIngestionController` — POST `/api/v1/data/events`.
|
||||||
- `ChunkIngestionController` — POST `/api/v1/ingestion/chunk/{executions|metrics|diagrams}`.
|
- `ChunkIngestionController` — POST `/api/v1/data/executions`. Accepts a single `ExecutionChunk` or an array (fields include `exchangeId`, `applicationId`, `instanceId`, `routeId`, `status`, `startTime`, `endTime`, `durationMs`, `chunkSeq`, `final`, `processors: FlatProcessorRecord[]`). The accumulator merges non-final chunks by exchangeId and emits the merged envelope on the final chunk or on stale timeout. Legacy `ExecutionController` / `RouteExecution` shape is retired.
|
||||||
- `ExecutionController` — POST `/api/v1/data/executions` (legacy ingestion path when ClickHouse disabled).
|
|
||||||
- `MetricsController` — POST `/api/v1/data/metrics`.
|
- `MetricsController` — POST `/api/v1/data/metrics`.
|
||||||
- `DiagramController` — POST `/api/v1/data/diagrams` (resolves applicationId + environment from the agent registry keyed on JWT subject; stamps both on the stored `TaggedDiagram`).
|
- `DiagramController` — POST `/api/v1/data/diagrams` (resolves applicationId + environment from the agent registry keyed on JWT subject; stamps both on the stored `TaggedDiagram`).
|
||||||
|
|
||||||
@@ -81,28 +100,38 @@ ClickHouse is shared across tenants. Every ClickHouse query must filter by `tena
|
|||||||
- `RoleAdminController` — CRUD `/api/v1/admin/roles`.
|
- `RoleAdminController` — CRUD `/api/v1/admin/roles`.
|
||||||
- `GroupAdminController` — CRUD `/api/v1/admin/groups`.
|
- `GroupAdminController` — CRUD `/api/v1/admin/groups`.
|
||||||
- `OidcConfigAdminController` — GET/POST `/api/v1/admin/oidc`, POST `/test`.
|
- `OidcConfigAdminController` — GET/POST `/api/v1/admin/oidc`, POST `/test`.
|
||||||
|
- `OutboundConnectionAdminController` — `/api/v1/admin/outbound-connections`. GET list / POST create / GET `{id}` / PUT `{id}` / DELETE `{id}` / POST `{id}/test` / GET `{id}/usage`. RBAC: list/get/usage ADMIN|OPERATOR; mutations + test ADMIN.
|
||||||
- `SensitiveKeysAdminController` — GET/PUT `/api/v1/admin/sensitive-keys`. GET returns 200 or 204 if not configured. PUT accepts `{ keys: [...] }` with optional `?pushToAgents=true`. Fan-out iterates every distinct `(application, environment)` slice — intentional global baseline + per-env overrides.
|
- `SensitiveKeysAdminController` — GET/PUT `/api/v1/admin/sensitive-keys`. GET returns 200 or 204 if not configured. PUT accepts `{ keys: [...] }` with optional `?pushToAgents=true`. Fan-out iterates every distinct `(application, environment)` slice — intentional global baseline + per-env overrides.
|
||||||
- `ClaimMappingAdminController` — CRUD `/api/v1/admin/claim-mappings`, POST `/test`.
|
- `ClaimMappingAdminController` — CRUD `/api/v1/admin/claim-mappings`, POST `/test`.
|
||||||
- `LicenseAdminController` — GET/POST `/api/v1/admin/license`.
|
- `LicenseAdminController` — GET/POST `/api/v1/admin/license`. ADMIN only. GET returns `{state, invalidReason, envelope, lastValidatedAt?}` — the raw token is deliberately omitted; only the parsed `LicenseInfo` envelope is exposed. POST delegates to `LicenseService.install(token, userId, "api")` (acting userId resolved via the `user:` prefix-strip convention) — install/replace/reject all flow through `LicenseService` so audit, persistence, and `LicenseChangedEvent` publishing are uniform.
|
||||||
|
- `LicenseUsageController` — GET `/api/v1/admin/license/usage`. Returns license `state`, `expiresAt`/`daysRemaining`/`gracePeriodDays`/`tenantId`/`label`/`lastValidatedAt`, the `LicenseMessageRenderer.forState(...)` message, and a `limits[]` array (`{key, current, cap, source}`) covering every effective-limits key. `source` is `"license"` when the cap came from the license override map, `"default"` otherwise. `max_agents` reads from `AgentRegistryService.liveCount()`; all other counts come from `LicenseUsageReader.snapshot()`.
|
||||||
- `ThresholdAdminController` — CRUD `/api/v1/admin/thresholds`.
|
- `ThresholdAdminController` — CRUD `/api/v1/admin/thresholds`.
|
||||||
- `AuditLogController` — GET `/api/v1/admin/audit`.
|
- `AuditLogController` — GET `/api/v1/admin/audit`.
|
||||||
- `RbacStatsController` — GET `/api/v1/admin/rbac/stats`.
|
- `RbacStatsController` — GET `/api/v1/admin/rbac/stats`.
|
||||||
- `UsageAnalyticsController` — GET `/api/v1/admin/usage` (ClickHouse `usage_events`).
|
- `UsageAnalyticsController` — GET `/api/v1/admin/usage` (ClickHouse `usage_events`).
|
||||||
- `ClickHouseAdminController` — GET `/api/v1/admin/clickhouse/**` (conditional on `infrastructureendpoints` flag).
|
- `ClickHouseAdminController` — GET `/api/v1/admin/clickhouse/**` (conditional on `infrastructureendpoints` flag).
|
||||||
- `DatabaseAdminController` — GET `/api/v1/admin/database/**` (conditional on `infrastructureendpoints` flag).
|
- `DatabaseAdminController` — GET `/api/v1/admin/database/**` (conditional on `infrastructureendpoints` flag).
|
||||||
|
- `ServerMetricsAdminController` — `/api/v1/admin/server-metrics/**`. GET `/catalog`, GET `/instances`, POST `/query`. Generic read API over the `server_metrics` ClickHouse table so SaaS dashboards don't need direct CH access. Delegates to `ServerMetricsQueryStore` (impl `ClickHouseServerMetricsQueryStore`). Visibility matches ClickHouse/Database admin: `@ConditionalOnProperty(infrastructureendpoints, matchIfMissing=true)` + class-level `@PreAuthorize("hasRole('ADMIN')")`. Validation: metric/tag regex `^[a-zA-Z0-9._]+$`, statistic regex `^[a-z_]+$`, `to - from ≤ 31 days`, stepSeconds ∈ [10, 3600], response capped at 500 series. `IllegalArgumentException` → 400. `/query` supports `raw` + `delta` modes (delta does per-`server_instance_id` positive-clipped differences, then aggregates across instances). Derived `statistic=mean` for timers computes `sum(total|total_time)/sum(count)` per bucket.
|
||||||
|
|
||||||
|
### Auth (flat)
|
||||||
|
|
||||||
|
- `UiAuthController` — `/api/v1/auth` (login, refresh, me, logout). Local username/password against env-var admin or DB BCrypt hash. Lockout after 5 failed attempts. `POST /logout` is permitAll — controller resolves the user from the access token if present, bumps `users.token_revoked_before = now().plusMillis(1)` to invalidate all outstanding refresh + access tokens (enforced by `JwtAuthenticationFilter`), audits `AuditCategory.AUTH / logout`, returns 204. Best-effort: 204 also when called without a token so the SPA's logout never fails on already-expired sessions. The +1ms guards against same-millisecond races (JWT `iat` is ms-quantised, filter check is strict `isBefore`).
|
||||||
|
- `OidcAuthController` — `/api/v1/auth/oidc` (config, callback). Code → token exchange. Roles via custom JWT claim, claim mapping rules, or default roles.
|
||||||
|
- `AuthCapabilitiesController` — `GET /api/v1/auth/capabilities` (unauthenticated). Reports `{oidc:{enabled, providerName, primary}, localAccounts:{enabled, adminRecoveryOnly}}` so the SPA renders the login page deterministically. `oidc.primary == oidc.enabled`; `localAccounts.adminRecoveryOnly == oidc.primary`. `providerName` is best-effort label via `OidcProviderNameDeriver` (Logto / Keycloak / Auth0 / Okta / Single Sign-On). The SPA hides the local form behind `?local` when `adminRecoveryOnly` is true.
|
||||||
|
|
||||||
### Other (flat)
|
### Other (flat)
|
||||||
|
|
||||||
- `DetailController` — GET `/api/v1/executions/{executionId}` + processor snapshot endpoints.
|
- `DetailController` — GET `/api/v1/executions/{executionId}` + processor snapshot endpoints.
|
||||||
- `MetricsController` — exposes `/api/v1/metrics` and `/api/v1/prometheus` (server-side Prometheus scrape endpoint).
|
- `MetricsController` — exposes `/api/v1/metrics` and `/api/v1/prometheus` (server-side Prometheus scrape endpoint).
|
||||||
|
- `ArtifactDownloadController` — GET `/api/v1/artifacts/{appVersionId}?exp&sig`. HMAC-signed URL is the auth (permitAll'd in `SecurityConfig`); validates via `ArtifactDownloadTokenSigner`. Streams the artifact via `ArtifactStore.get(coords)` with content type `application/java-archive`. Hit by the `cameleer-runtime-loader` init container at deploy time. 401 on bad sig, 404 on missing version, 200 on success.
|
||||||
|
|
||||||
## runtime/ — Docker orchestration
|
## runtime/ — Docker orchestration
|
||||||
|
|
||||||
- `DockerRuntimeOrchestrator` — implements RuntimeOrchestrator; Docker Java client (zerodep transport), container lifecycle
|
- `DockerRuntimeOrchestrator` — implements RuntimeOrchestrator; Docker Java client (zerodep transport), container lifecycle. **`startContainer` is a 2-phase op**: per-replica named volume → `cameleer-runtime-loader` init container fetches the JAR via signed URL → main container starts with the volume mounted RO at `/app/jars`. Both containers get `cap_drop ALL`, `no-new-privileges`, `apparmor=docker-default`, readonly rootfs, pids=512, `/tmp` tmpfs (no `noexec`), and `userns_mode=host:1000:65536`. Volume cleanup deterministic via `removeContainer` deriving the volume name from the inspected container.
|
||||||
- `DeploymentExecutor` — @Async staged deploy: PRE_FLIGHT -> PULL_IMAGE -> CREATE_NETWORK -> START_REPLICAS -> HEALTH_CHECK -> SWAP_TRAFFIC -> COMPLETE. Container names are `{tenantId}-{envSlug}-{appSlug}-{replicaIndex}` (globally unique on Docker daemon). Sets per-replica `CAMELEER_AGENT_INSTANCEID` env var to `{envSlug}-{appSlug}-{replicaIndex}`.
|
- `DeploymentExecutor` — @Async staged deploy: PRE_FLIGHT -> PULL_IMAGE -> CREATE_NETWORK -> START_REPLICAS -> HEALTH_CHECK -> SWAP_TRAFFIC -> COMPLETE. Pulls both `baseImage` and `loaderImage` at PULL_IMAGE. Generates per-deploy signed download URLs via `ArtifactDownloadTokenSigner.sign(appVersionId, ttl)` — passes URL + appVersionId + jarSizeBytes + loaderImage into `ContainerRequest`. The host filesystem is no longer involved at deploy time. Container names are `{tenantId}-{envSlug}-{appSlug}-{replicaIndex}-{generation}`, where `generation` is the first 8 chars of the deployment UUID — old and new replicas coexist during a blue/green swap. Per-replica `CAMELEER_AGENT_INSTANCEID` env var is `{envSlug}-{appSlug}-{replicaIndex}-{generation}`. Branches on `DeploymentStrategy.fromWire(config.deploymentStrategy())`: **blue-green** (default) starts all N → waits for all healthy → stops old (partial health = FAILED, preserves old untouched); **rolling** replaces replicas one at a time with rollback only for in-flight new containers (already-replaced old stay stopped; un-replaced old keep serving). DEGRADED is now only set by `DockerEventMonitor` post-deploy, never by the executor. **License compute caps**: at PRE_FLIGHT (after `ConfigMerger.resolve`, before image pull / container creation) the executor consults `LicenseUsageReader.computeUsage()` (PG aggregate over non-stopped deployments) and runs three `LicenseEnforcer.assertWithinCap(...)` checks for `max_total_cpu_millis`, `max_total_memory_mb`, and `max_total_replicas`. A `LicenseCapExceededException` propagates to the surrounding `try/catch` which marks the deployment FAILED with the cap message in `deployments.error_message`.
|
||||||
- `DockerNetworkManager` — ensures bridge networks (cameleer-traefik, cameleer-env-{slug}), connects containers
|
- `DockerNetworkManager` — ensures bridge networks (cameleer-traefik, cameleer-env-{slug}), connects containers
|
||||||
- `DockerEventMonitor` — persistent Docker event stream listener (die, oom, start, stop), updates deployment status
|
- `DockerEventMonitor` — persistent Docker event stream listener (die, oom, start, stop), updates deployment status
|
||||||
- `TraefikLabelBuilder` — generates Traefik Docker labels for path-based or subdomain routing. Also emits `cameleer.replica` and `cameleer.instance-id` labels per container for labels-first identity.
|
- `TraefikLabelBuilder` — generates Traefik Docker labels for path-based or subdomain routing. Per-container identity labels: `cameleer.replica` (index), `cameleer.generation` (deployment-scoped 8-char id — for Prometheus/Grafana deploy-boundary annotations), `cameleer.instance-id` (`{envSlug}-{appSlug}-{replicaIndex}-{generation}`). Router/service label keys are generation-agnostic so load balancing spans old + new replicas during a blue/green overlap.
|
||||||
- `PrometheusLabelBuilder` — generates Prometheus Docker labels (`prometheus.scrape/path/port`) per runtime type for `docker_sd_configs` auto-discovery
|
- `PrometheusLabelBuilder` — generates Prometheus Docker labels (`prometheus.scrape/path/port`) per runtime type for `docker_sd_configs` auto-discovery
|
||||||
- `ContainerLogForwarder` — streams Docker container stdout/stderr to ClickHouse with `source='container'`. One follow-stream thread per container, batches lines every 2s/50 lines via `ClickHouseLogStore.insertBufferedBatch()`. 60-second max capture timeout.
|
- `ContainerLogForwarder` — streams Docker container stdout/stderr to ClickHouse with `source='container'`. One follow-stream thread per container, batches lines every 2s/50 lines via `ClickHouseLogStore.insertBufferedBatch()`. 60-second max capture timeout.
|
||||||
- `DisabledRuntimeOrchestrator` — no-op when runtime not enabled
|
- `DisabledRuntimeOrchestrator` — no-op when runtime not enabled
|
||||||
@@ -110,15 +139,22 @@ ClickHouse is shared across tenants. Every ClickHouse query must filter by `tena
|
|||||||
## metrics/ — Prometheus observability
|
## metrics/ — Prometheus observability
|
||||||
|
|
||||||
- `ServerMetrics` — centralized business metrics: gauges (agents by state, SSE connections, buffer depths), counters (ingestion drops, agent transitions, deployment outcomes, auth failures), timers (flush duration, deployment duration). Exposed via `/api/v1/prometheus`.
|
- `ServerMetrics` — centralized business metrics: gauges (agents by state, SSE connections, buffer depths), counters (ingestion drops, agent transitions, deployment outcomes, auth failures), timers (flush duration, deployment duration). Exposed via `/api/v1/prometheus`.
|
||||||
|
- `ServerInstanceIdConfig` — `@Configuration`, exposes `@Bean("serverInstanceId") String`. Resolution precedence: `cameleer.server.instance-id` property → `HOSTNAME` env → `InetAddress.getLocalHost()` → random UUID. Fixed at boot; rotates across restarts so counters restart cleanly.
|
||||||
|
- `ServerMetricsSnapshotScheduler` — `@Scheduled(fixedDelayString = "${cameleer.server.self-metrics.interval-ms:60000}")`. Walks `MeterRegistry.getMeters()` each tick, emits one `ServerMetricSample` per `Measurement` (Timer/DistributionSummary produce multiple rows per meter — one per Micrometer `Statistic`). Skips non-finite values; logs and swallows store failures. Disabled via `cameleer.server.self-metrics.enabled=false` (`@ConditionalOnProperty`). Write-only — no query endpoint yet; inspect via `/api/v1/admin/clickhouse/query`.
|
||||||
|
|
||||||
## storage/ — PostgreSQL repositories (JdbcTemplate)
|
## storage/ — PostgreSQL repositories (JdbcTemplate)
|
||||||
|
|
||||||
- `PostgresAppRepository`, `PostgresAppVersionRepository`, `PostgresEnvironmentRepository`
|
- `PostgresAppRepository`, `PostgresAppVersionRepository`, `PostgresEnvironmentRepository`
|
||||||
- `PostgresDeploymentRepository` — includes JSONB replica_states, deploy_stage, findByContainerId
|
- `PostgresDeploymentRepository` — includes JSONB replica_states, deploy_stage, findByContainerId. Also carries `deployed_config_snapshot` JSONB (Flyway V3) populated by `DeploymentExecutor` via `saveDeployedConfigSnapshot(UUID, DeploymentConfigSnapshot)` on successful RUNNING transition. Consumed by `DirtyStateCalculator` for the `/apps/{slug}/dirty-state` endpoint and by the UI for checkpoint restore.
|
||||||
- `PostgresUserRepository`, `PostgresRoleRepository`, `PostgresGroupRepository`
|
- `PostgresUserRepository`, `PostgresRoleRepository`, `PostgresGroupRepository`
|
||||||
- `PostgresAuditRepository`, `PostgresOidcConfigRepository`, `PostgresClaimMappingRepository`, `PostgresSensitiveKeysRepository`
|
- `PostgresAuditRepository`, `PostgresOidcConfigRepository`, `PostgresClaimMappingRepository`, `PostgresSensitiveKeysRepository`
|
||||||
- `PostgresAppSettingsRepository`, `PostgresApplicationConfigRepository`, `PostgresThresholdRepository`. Both `app_settings` and `application_config` are env-scoped (PK `(app_id, environment)` / `(application, environment)`); finders take `(app, env)` — no env-agnostic variants.
|
- `PostgresAppSettingsRepository`, `PostgresApplicationConfigRepository`, `PostgresThresholdRepository`. Both `app_settings` and `application_config` are env-scoped (PK `(app_id, environment)` / `(application, environment)`); finders take `(app, env)` — no env-agnostic variants.
|
||||||
|
|
||||||
|
## storage/ — Artifact storage (concrete impls)
|
||||||
|
|
||||||
|
- `FilesystemArtifactStore` — implements `ArtifactStore` interface from `cameleer-server-core`. Persists JAR bytes under `{cameleer.server.runtime.jarstoragepath}/{appId}/v{version}/app.jar` (preserves the legacy layout — historical `app_versions.jar_path` rows resolve identically). `put` writes via `<target>.tmp` + `Files.move(ATOMIC_MOVE)` so concurrent readers never see a torn file. `delete` sweeps empty parent dirs and tolerates `DirectoryNotEmptyException` from concurrent sibling-version uploads. `size(coords)` returns the actual on-disk byte count — used by `ArtifactDownloadController` for authoritative `Content-Length` instead of trusting `AppVersion.jarSizeBytes`.
|
||||||
|
- `ArtifactDownloadTokenSigner` — HMAC-SHA256 URL signer/verifier. Key derived deterministically from JWT secret via HMAC(secret, "cameleer-artifact-token-v1"). Sign produces `{exp, sig}` tuple where `sig = base64url-no-pad(HMAC-SHA256(key, "{uuid}:{exp}"))`. `verify` is constant-time via `MessageDigest.isEqual`. Used by `DeploymentExecutor` to mint download URLs and by `ArtifactDownloadController` to verify them. Rejects null/blank secret at construction.
|
||||||
|
|
||||||
## storage/ — ClickHouse stores
|
## storage/ — ClickHouse stores
|
||||||
|
|
||||||
- `ClickHouseExecutionStore`, `ClickHouseMetricsStore`, `ClickHouseMetricsQueryStore`
|
- `ClickHouseExecutionStore`, `ClickHouseMetricsStore`, `ClickHouseMetricsQueryStore`
|
||||||
@@ -126,6 +162,8 @@ ClickHouse is shared across tenants. Every ClickHouse query must filter by `tena
|
|||||||
- `ClickHouseDiagramStore`, `ClickHouseAgentEventRepository`
|
- `ClickHouseDiagramStore`, `ClickHouseAgentEventRepository`
|
||||||
- `ClickHouseUsageTracker` — usage_events for billing
|
- `ClickHouseUsageTracker` — usage_events for billing
|
||||||
- `ClickHouseRouteCatalogStore` — persistent route catalog with first_seen cache, warm-loaded on startup
|
- `ClickHouseRouteCatalogStore` — persistent route catalog with first_seen cache, warm-loaded on startup
|
||||||
|
- `ClickHouseServerMetricsStore` — periodic dumps of the server's own Micrometer registry into the `server_metrics` table. Tenant-stamped (bound at the scheduler, not the bean); no `environment` column (server straddles envs). Batch-insert via `JdbcTemplate.batchUpdate` with `Map(String, String)` tag binding. Written by `ServerMetricsSnapshotScheduler`.
|
||||||
|
- `ClickHouseServerMetricsQueryStore` — read side of `server_metrics` for dashboards. Implements `ServerMetricsQueryStore`. `catalog(from,to)` returns name+type+statistics+tagKeys, `listInstances(from,to)` returns server_instance_ids with first/last seen, `query(request)` builds bucketed time-series with `raw` or `delta` mode and supports a derived `mean` statistic for timers. All identifier inputs regex-validated; tenant_id always bound; max range 31 days; series count capped at 500. Exposed via `ServerMetricsAdminController`.
|
||||||
|
|
||||||
## search/ — ClickHouse search and log stores
|
## search/ — ClickHouse search and log stores
|
||||||
|
|
||||||
@@ -134,10 +172,12 @@ ClickHouse is shared across tenants. Every ClickHouse query must filter by `tena
|
|||||||
|
|
||||||
## security/ — Spring Security
|
## security/ — Spring Security
|
||||||
|
|
||||||
- `SecurityConfig` — WebSecurityFilterChain, JWT filter, CORS, OIDC conditional
|
- `SecurityConfig` — WebSecurityFilterChain, JWT filter, CORS, OIDC conditional. `/api/v1/admin/outbound-connections/**` GETs permit OPERATOR in addition to ADMIN (defense-in-depth at controller level); mutations remain ADMIN-only. Alerting matchers: GET `/environments/*/alerts/**` VIEWER+; POST/PUT/DELETE rules and silences OPERATOR+; ack/read/bulk-read VIEWER+; POST `/alerts/notifications/*/retry` OPERATOR+.
|
||||||
- `JwtAuthenticationFilter` — OncePerRequestFilter, validates Bearer tokens
|
- `JwtAuthenticationFilter` — OncePerRequestFilter, validates Bearer tokens. Tries internal HMAC first; on failure (and when `oidcDecoder != null`) falls back to external-IdP validation. Resource-server path delegates to `OidcAccountSyncService.ensureProvisioned(jwt)` to upsert the user into `users` on first contact — without it, every later FK-to-`users(user_id)` insert (`deployments.created_by`, `alert_rules.created_by`, …) would 500 with a foreign-key violation. Sets `principal.name` to the bare `oidc:<sub>` so the env-scoped strip-`"user:"` convention is a no-op (still produces the correct FK value).
|
||||||
|
- `OidcAccountSyncService` — provisions OIDC users into `users` from the `JwtAuthenticationFilter` resource-server path. `ensureProvisioned(Jwt)` short-circuits when the user exists; otherwise reads `OidcConfigRepository` (defaults `autoSignup=true` when no row — i.e., OIDC configured purely via env var), enforces the `max_users` cap via `LicenseEnforcer`, then upserts `UserInfo(userId="oidc:<sub>", provider="oidc:<issuer-host>", email, displayName, createdAt)`. Returns `Optional.empty()` on refusal so the filter falls through to anonymous (Spring → 401), never throws.
|
||||||
- `JwtServiceImpl` — HMAC-SHA256 JWT (Nimbus JOSE)
|
- `JwtServiceImpl` — HMAC-SHA256 JWT (Nimbus JOSE)
|
||||||
- `OidcAuthController` — /api/v1/auth/oidc (login-uri, token-exchange, logout)
|
- `UiAuthController` — `/api/v1/auth` (login, refresh, me, logout). Upserts `users.user_id = request.username()` (bare); signs JWTs with `subject = "user:" + userId`. `refresh`/`me`/`logout` strip the `"user:"` prefix from incoming subjects via `stripSubjectPrefix()` before any DB/RBAC lookup. `logout` revokes outstanding tokens by writing `users.token_revoked_before` and audits under `AuditCategory.AUTH / logout`.
|
||||||
|
- `OidcAuthController` — `/api/v1/auth/oidc` (login-uri, token-exchange, logout). Upserts `users.user_id = "oidc:" + oidcUser.subject()` (no `user:` prefix); signs JWTs with `subject = "user:oidc:" + oidcUser.subject()`. `applyClaimMappings` + `getSystemRoleNames` calls all use the bare `oidc:<sub>` form.
|
||||||
- `OidcTokenExchanger` — code -> tokens, role extraction from access_token then id_token
|
- `OidcTokenExchanger` — code -> tokens, role extraction from access_token then id_token
|
||||||
- `OidcProviderHelper` — OIDC discovery, JWK source cache
|
- `OidcProviderHelper` — OIDC discovery, JWK source cache
|
||||||
|
|
||||||
@@ -151,10 +191,52 @@ ClickHouse is shared across tenants. Every ClickHouse query must filter by `tena
|
|||||||
|
|
||||||
- `JarRetentionJob` — @Scheduled 03:00 daily, per-environment retention, skips deployed versions
|
- `JarRetentionJob` — @Scheduled 03:00 daily, per-environment retention, skips deployed versions
|
||||||
|
|
||||||
|
## alerting/eval/ — Rule evaluation
|
||||||
|
|
||||||
|
- `AlertEvaluatorJob` — @Scheduled tick driver; per-rule claim/release via `AlertRuleRepository`, dispatches to per-kind `ConditionEvaluator`, persists advanced cursor on release via `AlertRule.withEvalState`.
|
||||||
|
- `BatchResultApplier` — `@Component` that wraps a single rule's tick outcome (`EvalResult.Batch` = `firings` + `nextEvalState`) in one `@Transactional` boundary: instance upserts + notification enqueues + cursor advance commit atomically or roll back together. This is the exactly-once-per-exchange guarantee for `PER_EXCHANGE` fire mode.
|
||||||
|
- `ConditionEvaluator` — interface; per-kind implementations: `ExchangeMatchEvaluator`, `AgentLifecycleEvaluator`, `AgentStateEvaluator`, `DeploymentStateEvaluator`, `JvmMetricEvaluator`, `LogPatternEvaluator`, `RouteMetricEvaluator`.
|
||||||
|
- `AlertStateTransitions` — PER_EXCHANGE vs rule-level FSM helpers (fire/resolve/ack).
|
||||||
|
- `PerKindCircuitBreaker` — trips noisy per-kind evaluators; `TickCache` — per-tick shared lookups (apps, envs, silences).
|
||||||
|
|
||||||
|
## http/ — Outbound HTTP client implementation
|
||||||
|
|
||||||
|
- `SslContextBuilder` — composes SSL context from `OutboundHttpProperties` + `OutboundHttpRequestContext`. Supports SYSTEM_DEFAULT (JDK roots + configured CA extras), TRUST_ALL (short-circuit no-op TrustManager), TRUST_PATHS (JDK roots + system extras + per-request extras). Throws `IllegalArgumentException("CA file not found: ...")` on missing PEM.
|
||||||
|
- `ApacheOutboundHttpClientFactory` — Apache HttpClient 5 impl of `OutboundHttpClientFactory`. Memoizes clients per `CacheKey(trustAll, caPaths, mode, connectTimeout, readTimeout)`. Applies `NoopHostnameVerifier` when trust-all is active.
|
||||||
|
- `config/OutboundHttpConfig` — `@ConfigurationProperties("cameleer.server.outbound-http")`. Exposes beans: `OutboundHttpProperties`, `SslContextBuilder`, `OutboundHttpClientFactory`. `@PostConstruct` logs WARN on trust-all and throws if configured CA paths don't exist.
|
||||||
|
|
||||||
|
## outbound/ — Admin-managed outbound connections (implementation)
|
||||||
|
|
||||||
|
- `crypto/SecretCipher` — AES-GCM symmetric cipher with key derived via HMAC-SHA256(jwtSecret, "cameleer-outbound-secret-v1"). Ciphertext format: base64(IV(12 bytes) || GCM output with 128-bit tag). `encrypt` throws `IllegalStateException`; `decrypt` throws `IllegalArgumentException` on tamper/wrong-key/malformed.
|
||||||
|
- `storage/PostgresOutboundConnectionRepository` — JdbcTemplate impl. `save()` upserts by id; JSONB serialization via ObjectMapper; UUID arrays via `ConnectionCallback`. Reads `created_by`/`updated_by` as String (= users.user_id TEXT).
|
||||||
|
- `OutboundConnectionServiceImpl` — service layer. Tenant bound at construction via `cameleer.server.tenant.id` property. Uniqueness check via `findByName`. Narrowing-envs guard: rejects update that removes envs while rules reference the connection (rulesReferencing stubbed in Plan 01, wired in Plan 02). Delete guard: rejects if referenced by rules.
|
||||||
|
- `controller/OutboundConnectionAdminController` — REST controller. Class-level `@PreAuthorize("hasRole('ADMIN')")` defaults; GETs relaxed to ADMIN|OPERATOR. Resolves acting user id via the user-id convention (strip `"user:"` from `authentication.name` → matches `users.user_id` FK). Audit via `AuditCategory.OUTBOUND_CONNECTION_CHANGE`.
|
||||||
|
- `dto/OutboundConnectionRequest` — Bean Validation: `@NotBlank` name, `@Pattern("^https://.+")` url, `@NotNull` method/tlsTrustMode/auth. Compact ctor throws `IllegalArgumentException` if TRUST_PATHS with empty paths list.
|
||||||
|
- `dto/OutboundConnectionDto` — response DTO. `hmacSecretSet: boolean` instead of the ciphertext; `authKind: OutboundAuthKind` instead of the full auth config.
|
||||||
|
- `dto/OutboundConnectionTestResult` — result of POST `/{id}/test`: status, latencyMs, responseSnippet (first 512 chars), tlsProtocol/cipherSuite/peerCertSubject (protocol is "TLS" stub; enriched in Plan 02 follow-up), error (nullable).
|
||||||
|
- `config/OutboundBeanConfig` — registers `OutboundConnectionRepository`, `SecretCipher`, `OutboundConnectionService` beans.
|
||||||
|
|
||||||
|
## license/ — License enforcement & lifecycle
|
||||||
|
|
||||||
|
- `LicenseService` — install / replace / revalidate mediator. `install(token, installedBy, source)` validates via `LicenseValidator`, on failure marks the gate INVALID + audits `reject_license` + publishes `LicenseChangedEvent` and rethrows; on success persists via `LicenseRepository.upsert(...)`, mutates `LicenseGate`, audits `install_license` or `replace_license` (detects existing row), and publishes `LicenseChangedEvent`. `loadInitial(envToken, fileToken)` boot precedence env > file > DB; ABSENT publishes a `LicenseChangedEvent(ABSENT, null)`. `revalidate()` re-runs validation against the persisted token, on success bumps `last_validated_at`; on failure marks INVALID and audits `revalidate_license` FAILURE. `getTenantId()` exposes the tenant for downstream lookups.
|
||||||
|
- `LicenseRepository` — interface in `app/license`. `Optional<LicenseRecord> findByTenantId(String)`, `void upsert(LicenseRecord)`, `int touchValidated(String tenantId, Instant)`, `int delete(String)`.
|
||||||
|
- `LicenseRecord` — record persisted in PG `license` table: `(String tenantId, String token, UUID licenseId, Instant installedAt, String installedBy, Instant expiresAt, Instant lastValidatedAt)`.
|
||||||
|
- `PostgresLicenseRepository` — JdbcTemplate impl of `LicenseRepository`. Targets PG `license` table (V5). Upsert via `INSERT ... ON CONFLICT (tenant_id) DO UPDATE`.
|
||||||
|
- `LicenseChangedEvent` — Spring application event: `(LicenseState state, LicenseInfo current)`. Published on every install / replace / revalidate / boot-time ABSENT path so downstream listeners (retention policy, metrics, etc.) react uniformly.
|
||||||
|
- `LicenseEnforcer` — `@Component`. `assertWithinCap(String limitKey, long currentUsage, long requestedDelta)` consults `LicenseGate.getEffectiveLimits()`. On overflow increments `cameleer_license_cap_rejections_total{limit=...}`, emits an `AuditCategory.LICENSE / cap_exceeded` audit row when `AuditService` is wired (try/catch + log.warn so audit-write failures don't suppress the 403), and throws `LicenseCapExceededException`. Unknown limit keys propagate `IllegalArgumentException` from `LicenseLimits.get(...)` (programmer error, not a 403).
|
||||||
|
- `LicenseUsageReader` — `@Component` over PG. `snapshot()` returns a `Map<String,Long>` of (max_environments, max_apps, max_users, max_outbound_connections, max_alert_rules, max_total_cpu_millis, max_total_memory_mb, max_total_replicas) from PG row counts and a SUM over non-stopped deployments' `deployed_config_snapshot.containerConfig` (replicas × cpuLimit / memoryLimitMb). `computeUsage()` returns the typed `ComputeUsage(cpuMillis, memoryMb, replicas)` tuple consumed by `DeploymentExecutor` PRE_FLIGHT cap checks. `agentCount(int)` echoes a registry-supplied live count (registry is in-memory; not stored in PG).
|
||||||
|
- `LicenseCapExceededException` — typed `RuntimeException(limitKey, current, cap)` with accessors. Mapped to HTTP 403 by `LicenseExceptionAdvice`.
|
||||||
|
- `LicenseExceptionAdvice` — `@ControllerAdvice` mapping `LicenseCapExceededException` → 403 with body `{error:"license cap reached", limit, current, cap, state, message}` where `message` is `LicenseMessageRenderer.forCap(state, info, limit, current, cap, invalidReason)`.
|
||||||
|
- `LicenseMessageRenderer` — pure formatter (utility class, no DI). `forCap(state, info, limit, current, cap[, invalidReason])` per-state human messages for cap-rejection responses; `forState(state, info[, invalidReason])` shorter state-only messages for the `/usage` endpoint and metrics surfaces.
|
||||||
|
- `RetentionPolicyApplier` — `@EventListener(LicenseChangedEvent.class) @Async`. For each environment × table in the static `SPECS` list (`executions`, `processor_executions`, `logs`, `agent_metrics`, `agent_events`) computes `effective = min(licenseCap, env.configuredRetentionDays)` and emits `ALTER TABLE <t> MODIFY TTL toDateTime(<col>) + INTERVAL <n> DAY DELETE WHERE environment = '<slug>'`. ClickHouse failures are logged and swallowed (best-effort; never propagates to the originating license install/revalidate). `route_diagrams` (no TTL clause) and `server_metrics` (no environment column) are intentionally excluded.
|
||||||
|
- `LicenseRevalidationJob` — `@Component`. `@Scheduled(cron = "0 0 3 * * *")` daily revalidation; `@EventListener(ApplicationReadyEvent.class) @Async` 60-second post-startup tick to catch ABSENT→ACTIVE when a license was inserted between server starts. Both paths call `LicenseService.revalidate()` and swallow scheduler-thread crashes.
|
||||||
|
- `LicenseMetrics` — `@Component`. Registers Micrometer gauges: `cameleer_license_state{state=...}` (one-hot per `LicenseState`), `cameleer_license_days_remaining` (negative when ABSENT/INVALID), `cameleer_license_last_validated_age_seconds` (0 when no DB row). Refreshed eagerly on `LicenseChangedEvent` via `@EventListener` and lazily every 60s via `@Scheduled(fixedDelay = 60_000)`.
|
||||||
|
|
||||||
## config/ — Spring beans
|
## config/ — Spring beans
|
||||||
|
|
||||||
- `RuntimeOrchestratorAutoConfig` — conditional Docker/Disabled orchestrator + NetworkManager + EventMonitor
|
- `RuntimeOrchestratorAutoConfig` — conditional Docker/Disabled orchestrator + NetworkManager + EventMonitor
|
||||||
- `RuntimeBeanConfig` — DeploymentExecutor, AppService, EnvironmentService
|
- `RuntimeBeanConfig` — DeploymentExecutor, AppService, EnvironmentService. Wires `CreateGuard` instances per service from `LicenseEnforcer.assertWithinCap(...)` so creation paths (Environment, App, Agent) consult license caps without core depending on the app module.
|
||||||
- `SecurityBeanConfig` — JwtService, Ed25519, BootstrapTokenValidator
|
- `SecurityBeanConfig` — JwtService, Ed25519, BootstrapTokenValidator
|
||||||
- `StorageBeanConfig` — all repositories
|
- `StorageBeanConfig` — all repositories
|
||||||
- `ClickHouseConfig` — ClickHouse JdbcTemplate, schema initializer
|
- `ClickHouseConfig` — ClickHouse JdbcTemplate, schema initializer
|
||||||
|
- `LicenseBeanConfig` — license bean topology in dependency order: `LicenseGate` → `LicenseValidator` (when `cameleer.server.license.publickey` is unset, an always-failing override is returned so any loaded token still routes through `install()` and is audited as INVALID, never silently dropped) → `LicenseService` → `LicenseBootLoader` (`@PostConstruct` drives `loadInitial(envToken, fileToken)` once the context is ready; resolution order env var > license file > persisted DB row).
|
||||||
|
|||||||
@@ -8,11 +8,15 @@ paths:
|
|||||||
|
|
||||||
# CI/CD & Deployment
|
# CI/CD & Deployment
|
||||||
|
|
||||||
- CI workflow: `.gitea/workflows/ci.yml` — build -> docker -> deploy on push to main or feature branches
|
- CI workflow: `.gitea/workflows/ci.yml` — build -> docker -> deploy on push to main or feature branches. `paths-ignore` skips the whole pipeline for docs-only / `.planning/` / `.claude/` / `*.md` changes (push and PR triggers).
|
||||||
- Build step skips integration tests (`-DskipITs`) — Testcontainers needs Docker daemon
|
- Build step skips integration tests (`-DskipITs`) — Testcontainers needs Docker daemon
|
||||||
|
- Build caches (parallel `actions/cache@v4` steps in the `build` job): `~/.m2/repository` (key on all `pom.xml`), `~/.npm` (key on `ui/package-lock.json`), `ui/node_modules/.vite` (key on `ui/package-lock.json` + `ui/vite.config.ts`). UI install uses `npm ci --prefer-offline --no-audit --fund=false` so the npm cache is the primary source.
|
||||||
|
- Maven build performance (set in `pom.xml` and `cameleer-server-app/pom.xml`): `useIncrementalCompilation=true` on the compiler plugin; Surefire uses `forkCount=1C` + `reuseForks=true` (one JVM per CPU core, reused across test classes); Failsafe keeps `forkCount=1` + `reuseForks=true`. Unit tests must not rely on per-class JVM isolation.
|
||||||
|
- UI build script (`ui/package.json`): `build` is `vite build` only — the type-check pass was split out into `npm run typecheck` (run separately when you want a full `tsc --noEmit` sweep).
|
||||||
- Docker: multi-stage build (`Dockerfile`), `$BUILDPLATFORM` for native Maven on ARM64 runner, amd64 runtime. `docker-entrypoint.sh` imports `/certs/ca.pem` into JVM truststore before starting the app (supports custom CAs for OIDC discovery without `CAMELEER_SERVER_SECURITY_OIDCTLSSKIPVERIFY`).
|
- Docker: multi-stage build (`Dockerfile`), `$BUILDPLATFORM` for native Maven on ARM64 runner, amd64 runtime. `docker-entrypoint.sh` imports `/certs/ca.pem` into JVM truststore before starting the app (supports custom CAs for OIDC discovery without `CAMELEER_SERVER_SECURITY_OIDCTLSSKIPVERIFY`).
|
||||||
- `REGISTRY_TOKEN` build arg required for `cameleer-common` dependency resolution
|
- `REGISTRY_TOKEN` build arg required for `cameleer-common` dependency resolution
|
||||||
- Registry: `gitea.siegeln.net/cameleer/cameleer-server` (container images)
|
- Registry: `gitea.siegeln.net/cameleer/cameleer-server` (container images — CI push target, internal hostname). The same registry is reachable as `registry.cameleer.io/cameleer/cameleer-server` for customer pulls; the server's compiled-in image defaults target the public alias. See `CLAUDE.md` § Registry naming.
|
||||||
|
- `cameleer-runtime-loader` image (init container that fetches the deployable JAR before the runtime container starts) is built and pushed by **cameleer-saas** CI (`docker/runtime-loader/` in that repo) — it lives alongside the other sidecar/infra images (runtime-base, postgres, clickhouse, traefik, logto). cameleer-server **consumes** the image via `DockerRuntimeOrchestrator` but does not build it. Cross-repo contract is regression-tested by `LoaderHardeningIT` here, which pulls the published `:latest` and asserts exit 0 under the orchestrator's hardening contract.
|
||||||
- K8s manifests in `deploy/` — Kustomize base + overlays (main/feature), shared infra (PostgreSQL, ClickHouse, Logto) as top-level manifests
|
- K8s manifests in `deploy/` — Kustomize base + overlays (main/feature), shared infra (PostgreSQL, ClickHouse, Logto) as top-level manifests
|
||||||
- Deployment target: k3s at 192.168.50.86, namespace `cameleer` (main), `cam-<slug>` (feature branches)
|
- Deployment target: k3s at 192.168.50.86, namespace `cameleer` (main), `cam-<slug>` (feature branches)
|
||||||
- Feature branches: isolated namespace, PG schema; Traefik Ingress at `<slug>-api.cameleer.siegeln.net`
|
- Feature branches: isolated namespace, PG schema; Traefik Ingress at `<slug>-api.cameleer.siegeln.net`
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ paths:
|
|||||||
|
|
||||||
# Core Module Key Classes
|
# Core Module Key Classes
|
||||||
|
|
||||||
`cameleer-server-core/src/main/java/com/cameleer/server/core/`
|
`cameleer-server-core/src/main/java/io/cameleer/server/core/`
|
||||||
|
|
||||||
## agent/ — Agent lifecycle and commands
|
## agent/ — Agent lifecycle and commands
|
||||||
|
|
||||||
@@ -17,7 +17,8 @@ paths:
|
|||||||
- `CommandType` — enum for command types (config-update, deep-trace, replay, route-control, etc.)
|
- `CommandType` — enum for command types (config-update, deep-trace, replay, route-control, etc.)
|
||||||
- `CommandStatus` — enum for command acknowledgement states
|
- `CommandStatus` — enum for command acknowledgement states
|
||||||
- `CommandReply` — record: command execution result from agent
|
- `CommandReply` — record: command execution result from agent
|
||||||
- `AgentEventRecord`, `AgentEventRepository` — event persistence
|
- `AgentEventRecord`, `AgentEventRepository` — event persistence. `AgentEventRepository.queryPage(...)` is cursor-paginated (`AgentEventPage{data, nextCursor, hasMore}`); the legacy non-paginated `query(...)` path is gone. `AgentEventRepository.findInWindow(env, appSlug, agentId, eventTypes, from, to, limit)` returns matching events ordered by `(timestamp ASC, insert_id ASC)` — consumed by `AgentLifecycleEvaluator`.
|
||||||
|
- `AgentEventPage` — record: `(List<AgentEventRecord> data, String nextCursor, boolean hasMore)` returned by `AgentEventRepository.queryPage`
|
||||||
- `AgentEventListener` — callback interface for agent events
|
- `AgentEventListener` — callback interface for agent events
|
||||||
- `RouteStateRegistry` — tracks per-agent route states
|
- `RouteStateRegistry` — tracks per-agent route states
|
||||||
|
|
||||||
@@ -25,33 +26,52 @@ paths:
|
|||||||
|
|
||||||
- `App` — record: id, environmentId, slug, displayName, containerConfig (JSONB)
|
- `App` — record: id, environmentId, slug, displayName, containerConfig (JSONB)
|
||||||
- `AppVersion` — record: id, appId, version, jarPath, detectedRuntimeType, detectedMainClass
|
- `AppVersion` — record: id, appId, version, jarPath, detectedRuntimeType, detectedMainClass
|
||||||
- `Environment` — record: id, slug, jarRetentionCount
|
- `Environment` — record: id, slug, displayName, production, enabled, defaultContainerConfig, jarRetentionCount, color, createdAt, executionRetentionDays, logRetentionDays, metricRetentionDays. `color` is one of the 8 preset palette values validated by `EnvironmentColor.VALUES` and CHECK-constrained in PostgreSQL (V2 migration). The 3 retention day fields (V5) are `int`-typed (not nullable, since unlimited has no use-case), default to 1 day per the V5 `NOT NULL DEFAULT 1`, validated >= 1 in the canonical constructor.
|
||||||
- `Deployment` — record: id, appId, appVersionId, environmentId, status, targetState, deploymentStrategy, replicaStates (JSONB), deployStage, containerId, containerName
|
- `EnvironmentColor` — constants: `DEFAULT = "slate"`, `VALUES = {slate,red,amber,green,teal,blue,purple,pink}`, `isValid(String)`.
|
||||||
- `DeploymentStatus` — enum: STOPPED, STARTING, RUNNING, DEGRADED, STOPPING, FAILED
|
- `Deployment` — record: id, appId, appVersionId, environmentId, status, targetState, deploymentStrategy, replicaStates (JSONB), deployStage, containerId, containerName, createdBy (String, user_id reference; nullable for pre-V4 historical rows)
|
||||||
|
- `DeploymentStatus` — enum: STOPPED, STARTING, RUNNING, DEGRADED, STOPPING, FAILED. `DEGRADED` is reserved for post-deploy drift (a replica died after RUNNING); `DeploymentExecutor` now marks partial-healthy deploys FAILED, not DEGRADED.
|
||||||
- `DeployStage` — enum: PRE_FLIGHT, PULL_IMAGE, CREATE_NETWORK, START_REPLICAS, HEALTH_CHECK, SWAP_TRAFFIC, COMPLETE
|
- `DeployStage` — enum: PRE_FLIGHT, PULL_IMAGE, CREATE_NETWORK, START_REPLICAS, HEALTH_CHECK, SWAP_TRAFFIC, COMPLETE
|
||||||
- `DeploymentService` — createDeployment (deletes terminal deployments first), markRunning, markFailed, markStopped
|
- `DeploymentStrategy` — enum: BLUE_GREEN, ROLLING. Stored on `ResolvedContainerConfig.deploymentStrategy` as kebab-case string (`"blue-green"` / `"rolling"`). `fromWire(String)` is the only conversion entry point; unknown/null inputs fall back to BLUE_GREEN so the executor dispatch site never null-checks or throws.
|
||||||
|
- `DeploymentService` — createDeployment (calls `deleteFailedByAppAndEnvironment` first so FAILED rows don't pile up; STOPPED rows are preserved as restorable checkpoints), markRunning, markFailed, markStopped
|
||||||
- `RuntimeType` — enum: AUTO, SPRING_BOOT, QUARKUS, PLAIN_JAVA, NATIVE
|
- `RuntimeType` — enum: AUTO, SPRING_BOOT, QUARKUS, PLAIN_JAVA, NATIVE
|
||||||
- `RuntimeDetector` — probes JAR files at upload time: detects runtime from manifest Main-Class (Spring Boot loader, Quarkus entry point, plain Java) or native binary (non-ZIP magic bytes)
|
- `RuntimeDetector` — probes JAR files at upload time: detects runtime from manifest Main-Class (Spring Boot loader, Quarkus entry point, plain Java) or native binary (non-ZIP magic bytes)
|
||||||
- `ContainerRequest` — record: 20 fields for Docker container creation (includes runtimeType, customArgs, mainClass)
|
- `ContainerRequest` — record: 21 fields for Docker container creation. Replaces the legacy `jarPath`/`jarVolumeName`/`jarVolumeMountPath` triple with `appVersionId` (UUID), `artifactDownloadUrl` (signed), `artifactExpectedSize` (bytes), and `loaderImage`. The orchestrator's loader init-container fetches the JAR from the URL into a per-replica named volume; the main container reads it from `/app/jars/app.jar`.
|
||||||
- `ContainerStatus` — record: state, running, exitCode, error
|
- `ContainerStatus` — record: state, running, exitCode, error
|
||||||
- `ResolvedContainerConfig` — record: typed config with memoryLimitMb, memoryReserveMb, cpuRequest, cpuLimit, appPort, exposedPorts, customEnvVars, stripPathPrefix, sslOffloading, routingMode, routingDomain, serverUrl, replicas, deploymentStrategy, routeControlEnabled, replayEnabled, runtimeType, customArgs, extraNetworks
|
- `ResolvedContainerConfig` — record: typed config with memoryLimitMb, memoryReserveMb, cpuRequest, cpuLimit, appPort, exposedPorts, customEnvVars, stripPathPrefix, sslOffloading, routingMode, routingDomain, serverUrl, replicas, deploymentStrategy, routeControlEnabled, replayEnabled, runtimeType, customArgs, extraNetworks, externalRouting (default `true`; when `false`, `TraefikLabelBuilder` strips all `traefik.*` labels so the container is not publicly routed), certResolver (server-wide, sourced from `CAMELEER_SERVER_RUNTIME_CERTRESOLVER`; when blank the `tls.certresolver` label is omitted — use for dev installs with a static TLS store)
|
||||||
- `RoutingMode` — enum for routing strategies
|
- `RoutingMode` — enum for routing strategies
|
||||||
- `ConfigMerger` — pure function: resolve(globalDefaults, envConfig, appConfig) -> ResolvedContainerConfig
|
- `ConfigMerger` — pure function: resolve(globalDefaults, envConfig, appConfig) -> ResolvedContainerConfig
|
||||||
- `RuntimeOrchestrator` — interface: startContainer, stopContainer, getContainerStatus, getLogs, startLogCapture, stopLogCapture
|
- `RuntimeOrchestrator` — interface: startContainer, stopContainer, getContainerStatus, getLogs, startLogCapture, stopLogCapture
|
||||||
- `AppRepository`, `AppVersionRepository`, `EnvironmentRepository`, `DeploymentRepository` — repository interfaces
|
- `AppRepository`, `AppVersionRepository`, `EnvironmentRepository`, `DeploymentRepository` — repository interfaces
|
||||||
- `AppService`, `EnvironmentService` — domain services
|
- `AppService`, `EnvironmentService` — domain services
|
||||||
|
- `CreateGuard` — `@FunctionalInterface`. `void check(long current)` — implementations throw to abort creation. `NOOP` constant is the default. Consulted by `EnvironmentService.create`, `AppService.createApp`, and `AgentRegistryService.register` so license caps can be enforced from the app module without leaking Spring or app-only types into core. Wired in `LicenseBeanConfig` to a `LicenseEnforcer.assertWithinCap(...)` call per limit key.
|
||||||
|
|
||||||
|
## license/ — License domain (signed-token tier system)
|
||||||
|
|
||||||
|
The pure license **contract types** live in the separate `cameleer-license-api` module under package `io.cameleer.license` (no Spring, no server-runtime deps) so consumers like `cameleer-license-minter` and `cameleer-saas` can use them without inheriting server internals. Server-core only contains the runtime state holder (`LicenseGate`).
|
||||||
|
|
||||||
|
Contract types in `cameleer-license-api` (package `io.cameleer.license`):
|
||||||
|
- `LicenseInfo` — record: `(UUID licenseId, String tenantId, String label, Map<String,Integer> limits, Instant issuedAt, Instant expiresAt, int gracePeriodDays)`. `isExpired()` true once `now > expiresAt + gracePeriodDays`; `isAfterRawExpiry()` true once `now > expiresAt`. Constructed via `LicenseValidator`; canonical ctor null-checks all required fields and rejects blank tenantId / negative grace.
|
||||||
|
- `LicenseLimits` — typed limits container backed by `Map<String,Integer>`. `defaultsOnly()` returns the `DefaultTierLimits.DEFAULTS` view; `mergeOverDefaults(overrides)` produces the license-overrides UNION default tier. `get(String key)` returns the cap; throws `IllegalArgumentException` for unknown keys (programmer error). `isDefaultSourced(key, license)` reports whether a key fell through to the default tier.
|
||||||
|
- `DefaultTierLimits` — immutable `LinkedHashMap` of constants for the no-license fallback tier: `max_environments=1, max_apps=3, max_agents=5, max_users=3, max_outbound_connections=1, max_alert_rules=2, max_total_cpu_millis=2000, max_total_memory_mb=2048, max_total_replicas=5, max_execution_retention_days=1, max_log_retention_days=1, max_metric_retention_days=1, max_jar_retention_count=3`.
|
||||||
|
- `LicenseValidator` — verifies signed token. Constructor `(String publicKeyBase64, String expectedTenantId)` decodes an X.509 Ed25519 public key. `validate(String token)` splits `payload.signature`, verifies the Ed25519 signature, parses the JSON payload, enforces `tenantId == expectedTenantId`, and returns `LicenseInfo`. Throws `SecurityException` on signature mismatch / `IllegalArgumentException` on parse failure / expired payload.
|
||||||
|
- `LicenseStateMachine` — pure classifier. `classify(LicenseInfo, String invalidReason)` returns `INVALID` if a reason is set, `ABSENT` if no license, `ACTIVE` if `now <= expiresAt`, `GRACE` if expired but within grace window, `EXPIRED` otherwise.
|
||||||
|
- `LicenseState` — enum: `ABSENT, ACTIVE, GRACE, EXPIRED, INVALID`.
|
||||||
|
|
||||||
|
Runtime state holder in server-core (package `io.cameleer.server.core.license`):
|
||||||
|
- `LicenseGate` — runtime state holder (thread-safe via `AtomicReference<Snapshot>`). `getCurrent()` returns the current `LicenseInfo` (null when ABSENT/INVALID); `getState()` delegates to `LicenseStateMachine.classify(...)`; `getEffectiveLimits()` returns license-overrides UNION defaults in `ACTIVE`/`GRACE`, defaults-only otherwise. `getInvalidReason()`, `load(LicenseInfo)`, `markInvalid(String reason)`, `clear()` are the mutators. `getLimit(key, defaultValue)` shorthand swallows unknown-key errors.
|
||||||
|
|
||||||
## search/ — Execution search and stats
|
## search/ — Execution search and stats
|
||||||
|
|
||||||
- `SearchService` — search, count, stats, statsForApp, statsForRoute, timeseries, timeseriesForApp, timeseriesForRoute, timeseriesGroupedByApp, timeseriesGroupedByRoute, slaCompliance, slaCountsByApp, slaCountsByRoute, topErrors, activeErrorTypes, punchcard, distinctAttributeKeys. `statsForRoute`/`timeseriesForRoute` take `(routeId, applicationId)` — app filter is applied to `stats_1m_route`.
|
- `SearchService` — search, count, stats, statsForApp, statsForRoute, timeseries, timeseriesForApp, timeseriesForRoute, timeseriesGroupedByApp, timeseriesGroupedByRoute, slaCompliance, slaCountsByApp, slaCountsByRoute, topErrors, activeErrorTypes, punchcard, distinctAttributeKeys. `statsForRoute`/`timeseriesForRoute` take `(routeId, applicationId)` — app filter is applied to `stats_1m_route`.
|
||||||
- `SearchRequest` / `SearchResult` — search DTOs
|
- `SearchRequest` / `SearchResult` — search DTOs. `SearchRequest.attributeFilters: List<AttributeFilter>` carries structured facet filters for execution attributes — key-only (exists), exact (key=value), or wildcard (`*` in value). The 21-arg legacy ctor is preserved for call-site churn; the compact ctor normalises null → `List.of()`.
|
||||||
|
- `AttributeFilter(key, value)` — record with key regex `^[a-zA-Z0-9._-]+$` (inlined into SQL, same constraint as alerting), `value == null` means key-exists, `value` containing `*` becomes a SQL LIKE pattern via `toLikePattern()`.
|
||||||
- `ExecutionStats`, `ExecutionSummary` — stats aggregation records
|
- `ExecutionStats`, `ExecutionSummary` — stats aggregation records
|
||||||
- `StatsTimeseries`, `TopError` — timeseries and error DTOs
|
- `StatsTimeseries`, `TopError` — timeseries and error DTOs
|
||||||
- `LogSearchRequest` / `LogSearchResponse` — log search DTOs
|
- `LogSearchRequest` / `LogSearchResponse` — log search DTOs. `LogSearchRequest.sources` / `levels` are `List<String>` (null-normalized, multi-value OR); `cursor` + `limit` + `sort` drive keyset pagination. Response carries `nextCursor` + `hasMore` + per-level `levelCounts`.
|
||||||
|
|
||||||
## storage/ — Storage abstractions
|
## storage/ — Storage abstractions
|
||||||
|
|
||||||
- `ExecutionStore`, `MetricsStore`, `MetricsQueryStore`, `StatsStore`, `DiagramStore`, `RouteCatalogStore`, `SearchIndex`, `LogIndex` — interfaces
|
- `ExecutionStore`, `MetricsStore`, `MetricsQueryStore`, `StatsStore`, `DiagramStore`, `RouteCatalogStore`, `SearchIndex`, `LogIndex` — interfaces. `DiagramStore.findLatestContentHashForAppRoute(appId, routeId, env)` resolves the latest diagram by (app, env, route) without consulting the agent registry, so routes whose publishing agents were removed between app versions still resolve. `findContentHashForRoute(route, instance)` is retained for the ingestion path that stamps a per-execution `diagramContentHash` at ingest time (point-in-time link from `ExecutionDetail`/`ExecutionSummary`).
|
||||||
- `RouteCatalogEntry` — record: applicationId, routeId, environment, firstSeen, lastSeen
|
- `RouteCatalogEntry` — record: applicationId, routeId, environment, firstSeen, lastSeen
|
||||||
- `LogEntryResult` — log query result record
|
- `LogEntryResult` — log query result record
|
||||||
- `model/` — `ExecutionDocument`, `MetricTimeSeries`, `MetricsSnapshot`
|
- `model/` — `ExecutionDocument`, `MetricTimeSeries`, `MetricsSnapshot`
|
||||||
@@ -77,7 +97,22 @@ paths:
|
|||||||
- `AppSettings`, `AppSettingsRepository` — per-app-per-env settings config and persistence. Record carries `(applicationId, environment, …)`; repository methods are `findByApplicationAndEnvironment`, `findByEnvironment`, `save`, `delete(appId, env)`. `AppSettings.defaults(appId, env)` produces a default instance scoped to an environment.
|
- `AppSettings`, `AppSettingsRepository` — per-app-per-env settings config and persistence. Record carries `(applicationId, environment, …)`; repository methods are `findByApplicationAndEnvironment`, `findByEnvironment`, `save`, `delete(appId, env)`. `AppSettings.defaults(appId, env)` produces a default instance scoped to an environment.
|
||||||
- `ThresholdConfig`, `ThresholdRepository` — alerting threshold config and persistence
|
- `ThresholdConfig`, `ThresholdRepository` — alerting threshold config and persistence
|
||||||
- `AuditService` — audit logging facade
|
- `AuditService` — audit logging facade
|
||||||
- `AuditRecord`, `AuditResult`, `AuditCategory`, `AuditRepository` — audit trail records and persistence
|
- `AuditRecord`, `AuditResult`, `AuditCategory` (enum: `INFRA, AUTH, USER_MGMT, CONFIG, RBAC, AGENT, OUTBOUND_CONNECTION_CHANGE, OUTBOUND_HTTP_TRUST_CHANGE, ALERT_RULE_CHANGE, ALERT_SILENCE_CHANGE, DEPLOYMENT, LICENSE`), `AuditRepository` — audit trail records and persistence
|
||||||
|
|
||||||
|
## http/ — Outbound HTTP primitives (cross-cutting)
|
||||||
|
|
||||||
|
- `OutboundHttpClientFactory` — interface: `clientFor(context)` returns memoized `CloseableHttpClient`
|
||||||
|
- `OutboundHttpProperties` — record: `trustAll, trustedCaPemPaths, defaultConnectTimeout, defaultReadTimeout, proxyUrl, proxyUsername, proxyPassword`
|
||||||
|
- `OutboundHttpRequestContext` — record of per-call TLS/timeout overrides; `systemDefault()` static factory
|
||||||
|
- `TrustMode` — enum: `SYSTEM_DEFAULT | TRUST_ALL | TRUST_PATHS`
|
||||||
|
|
||||||
|
## outbound/ — Admin-managed outbound connections
|
||||||
|
|
||||||
|
- `OutboundConnection` — record: id, tenantId, name, description, url, method, defaultHeaders, defaultBodyTmpl, tlsTrustMode, tlsCaPemPaths, hmacSecretCiphertext, auth, allowedEnvironmentIds, createdAt, createdBy (String user_id), updatedAt, updatedBy (String user_id). `isAllowedInEnvironment(envId)` returns true when allowed-envs list is empty OR contains the env.
|
||||||
|
- `OutboundAuth` — sealed interface + records: `None | Bearer(tokenCiphertext) | Basic(username, passwordCiphertext)`. Jackson `@JsonTypeInfo(use = DEDUCTION)` — wire shape has no discriminator, subtype inferred from fields.
|
||||||
|
- `OutboundAuthKind`, `OutboundMethod` — enums
|
||||||
|
- `OutboundConnectionRepository` — CRUD by (tenantId, id): save/findById/findByName/listByTenant/delete
|
||||||
|
- `OutboundConnectionService` — create/update/delete/get/list with uniqueness + narrow-envs + delete-if-referenced guards. `rulesReferencing(id)` stubbed in Plan 01 (returns `[]`); populated in Plan 02 against `AlertRuleRepository`.
|
||||||
|
|
||||||
## security/ — Auth
|
## security/ — Auth
|
||||||
|
|
||||||
@@ -91,8 +126,8 @@ paths:
|
|||||||
|
|
||||||
## ingestion/ — Buffered data pipeline
|
## ingestion/ — Buffered data pipeline
|
||||||
|
|
||||||
- `IngestionService` — ingestExecution, ingestMetric, ingestLog, ingestDiagram
|
- `IngestionService` — diagram + metrics facade (`ingestDiagram`, `acceptMetrics`, `getMetricsBuffer`). Execution ingestion went through here via the legacy `RouteExecution` shape until `ChunkAccumulator` took over writes from the chunked pipeline — the `ingestExecution` path plus its `ExecutionStore.upsert` / `upsertProcessors` dependencies were removed.
|
||||||
- `ChunkAccumulator` — batches data for efficient flush
|
- `ChunkAccumulator` — batches data for efficient flush; owns the execution write path (chunks → buffers → flush scheduler → `ClickHouseExecutionStore.insertExecutionBatch`).
|
||||||
- `WriteBuffer` — bounded ring buffer for async flush
|
- `WriteBuffer` — bounded ring buffer for async flush
|
||||||
- `BufferedLogEntry` — log entry wrapper with metadata
|
- `BufferedLogEntry` — log entry wrapper with metadata
|
||||||
- `MergedExecution`, `TaggedExecution`, `TaggedDiagram` — tagged ingestion records. `TaggedDiagram` carries `(instanceId, applicationId, environment, graph)` — env is resolved from the agent registry in the controller and stamped on the ClickHouse `route_diagrams` row.
|
- `MergedExecution`, `TaggedDiagram` — tagged ingestion records. `TaggedDiagram` carries `(instanceId, applicationId, environment, graph)` — env is resolved from the agent registry in the controller and stamped on the ClickHouse `route_diagrams` row.
|
||||||
|
|||||||
@@ -13,19 +13,63 @@ paths:
|
|||||||
When deployed via the cameleer-saas platform, this server orchestrates customer app containers using Docker. Key components:
|
When deployed via the cameleer-saas platform, this server orchestrates customer app containers using Docker. Key components:
|
||||||
|
|
||||||
- **ConfigMerger** (`core/runtime/ConfigMerger.java`) — pure function: resolve(globalDefaults, envConfig, appConfig) -> ResolvedContainerConfig. Three-layer merge: global (application.yml) -> environment (defaultContainerConfig JSONB) -> app (containerConfig JSONB). Includes `runtimeType` (default `"auto"`) and `customArgs` (default `""`).
|
- **ConfigMerger** (`core/runtime/ConfigMerger.java`) — pure function: resolve(globalDefaults, envConfig, appConfig) -> ResolvedContainerConfig. Three-layer merge: global (application.yml) -> environment (defaultContainerConfig JSONB) -> app (containerConfig JSONB). Includes `runtimeType` (default `"auto"`) and `customArgs` (default `""`).
|
||||||
- **TraefikLabelBuilder** (`app/runtime/TraefikLabelBuilder.java`) — generates Traefik Docker labels for path-based (`/{envSlug}/{appSlug}/`) or subdomain-based (`{appSlug}-{envSlug}.{domain}`) routing. Supports strip-prefix and SSL offloading toggles. Also sets per-replica identity labels: `cameleer.replica` (index) and `cameleer.instance-id` (`{envSlug}-{appSlug}-{replicaIndex}`). Internal processing uses labels (not container name parsing) for extensibility.
|
- **TraefikLabelBuilder** (`app/runtime/TraefikLabelBuilder.java`) — generates Traefik Docker labels for path-based (`/{envSlug}/{appSlug}/`) or subdomain-based (`{appSlug}-{envSlug}.{domain}`) routing. Supports strip-prefix and SSL offloading toggles. Per-replica identity labels: `cameleer.replica` (index), `cameleer.generation` (8-char deployment UUID prefix — pin Prometheus/Grafana deploy boundaries with this), `cameleer.instance-id` (`{envSlug}-{appSlug}-{replicaIndex}-{generation}`). Traefik router/service keys deliberately omit the generation so load balancing spans old + new replicas during a blue/green overlap. When `ResolvedContainerConfig.externalRouting()` is `false` (UI: Resources → External Routing, default `true`), the builder emits ONLY the identity labels (`managed-by`, `cameleer.*`) and skips every `traefik.*` label — the container stays on `cameleer-traefik` and the per-env network (so sibling containers can still reach it via Docker DNS) but is invisible to Traefik. The `tls.certresolver` label is emitted only when `CAMELEER_SERVER_RUNTIME_CERTRESOLVER` is set to a non-blank resolver name (matching a resolver configured in the Traefik static config). When unset (dev installs backed by a static TLS store) only `tls=true` is emitted and Traefik serves the default cert from the TLS store.
|
||||||
- **PrometheusLabelBuilder** (`app/runtime/PrometheusLabelBuilder.java`) — generates Prometheus `docker_sd_configs` labels per resolved runtime type: Spring Boot `/actuator/prometheus:8081`, Quarkus/native `/q/metrics:9000`, plain Java `/metrics:9464`. Labels merged into container metadata alongside Traefik labels at deploy time.
|
- **PrometheusLabelBuilder** (`app/runtime/PrometheusLabelBuilder.java`) — generates Prometheus `docker_sd_configs` labels per resolved runtime type: Spring Boot `/actuator/prometheus:8081`, Quarkus/native `/q/metrics:9000`, plain Java `/metrics:9464`. Labels merged into container metadata alongside Traefik labels at deploy time.
|
||||||
- **DockerNetworkManager** (`app/runtime/DockerNetworkManager.java`) — manages two Docker network tiers:
|
- **DockerNetworkManager** (`app/runtime/DockerNetworkManager.java`) — manages two Docker network tiers:
|
||||||
- `cameleer-traefik` — shared network; Traefik, server, and all app containers attach here. Server joined via docker-compose with `cameleer-server` DNS alias.
|
- `cameleer-traefik` — shared network; Traefik, server, and all app containers attach here. Server joined via docker-compose with `cameleer-server` DNS alias.
|
||||||
- `cameleer-env-{slug}` — per-environment isolated network; containers in the same environment discover each other via Docker DNS. In SaaS mode, env networks are tenant-scoped: `cameleer-env-{tenantId}-{envSlug}` (overloaded `envNetworkName(tenantId, envSlug)` method) to prevent cross-tenant collisions when multiple tenants have identically-named environments.
|
- `cameleer-env-{slug}` — per-environment isolated network; containers in the same environment discover each other via Docker DNS. In SaaS mode, env networks are tenant-scoped: `cameleer-env-{tenantId}-{envSlug}` (overloaded `envNetworkName(tenantId, envSlug)` method) to prevent cross-tenant collisions when multiple tenants have identically-named environments.
|
||||||
- **DockerEventMonitor** (`app/runtime/DockerEventMonitor.java`) — persistent Docker event stream listener for containers with `managed-by=cameleer-server` label. Detects die/oom/start/stop events and updates deployment replica states. Periodic reconciliation (@Scheduled every 30s) inspects actual container state and corrects deployment status mismatches (fixes stale DEGRADED with all replicas healthy).
|
- **DockerEventMonitor** (`app/runtime/DockerEventMonitor.java`) — persistent Docker event stream listener for containers with `managed-by=cameleer-server` label. Detects die/oom/start/stop events and updates deployment replica states. Periodic reconciliation (@Scheduled every 30s) inspects actual container state and corrects deployment status mismatches (fixes stale DEGRADED with all replicas healthy).
|
||||||
- **DeploymentProgress** (`ui/src/components/DeploymentProgress.tsx`) — UI step indicator showing 7 deploy stages with amber active/green completed styling.
|
- **DeploymentProgress** (`ui/src/components/DeploymentProgress.tsx`) — UI step indicator showing 7 deploy stages with amber active/green completed styling.
|
||||||
- **ContainerLogForwarder** (`app/runtime/ContainerLogForwarder.java`) — streams Docker container stdout/stderr to ClickHouse `logs` table with `source='container'`. Uses `docker logs --follow` per container, batches lines every 2s or 50 lines. Parses Docker timestamp prefix, infers log level via regex. `DeploymentExecutor` starts capture after each replica launches with the replica's `instanceId` (`{envSlug}-{appSlug}-{replicaIndex}`); `DockerEventMonitor` stops capture on die/oom. 60-second max capture timeout with 30s cleanup scheduler. Thread pool of 10 daemon threads. Container logs use the same `instanceId` as the agent (set via `CAMELEER_AGENT_INSTANCEID` env var) for unified log correlation at the instance level.
|
- **ContainerLogForwarder** (`app/runtime/ContainerLogForwarder.java`) — streams Docker container stdout/stderr to ClickHouse `logs` table with `source='container'`. Uses `docker logs --follow` per container, batches lines every 2s or 50 lines. Parses Docker timestamp prefix, infers log level via regex. `DeploymentExecutor` starts capture after each replica launches with the replica's `instanceId` (`{envSlug}-{appSlug}-{replicaIndex}-{generation}`); `DockerEventMonitor` stops capture on die/oom. 60-second max capture timeout with 30s cleanup scheduler. Thread pool of 10 daemon threads. Container logs use the same `instanceId` as the agent (set via `CAMELEER_AGENT_INSTANCEID` env var) for unified log correlation at the instance level. Instance-id changes per deployment — cross-deploy queries aggregate on `application + environment` (and optionally `replica_index`).
|
||||||
- **StartupLogPanel** (`ui/src/components/StartupLogPanel.tsx`) — collapsible log panel rendered below `DeploymentProgress`. Queries `/api/v1/logs?source=container&application={appSlug}&environment={envSlug}`. Auto-polls every 3s while deployment is STARTING; shows green "live" badge during polling, red "stopped" badge on FAILED. Uses `useStartupLogs` hook and `LogViewer` (design system).
|
- **StartupLogPanel** (`ui/src/components/StartupLogPanel.tsx`) — collapsible log panel rendered below `DeploymentProgress`. Queries `/api/v1/logs?source=container&application={appSlug}&environment={envSlug}`. Auto-polls every 3s while deployment is STARTING; shows green "live" badge during polling, red "stopped" badge on FAILED. Uses `useStartupLogs` hook and `LogViewer` (design system).
|
||||||
|
|
||||||
|
## Container Hardening (issue #152)
|
||||||
|
|
||||||
|
`DockerRuntimeOrchestrator.startContainer` applies an unconditional hardening contract to BOTH the loader init-container AND the main tenant container (`baseHardenedHostConfig()` is the shared helper). Java 17 has no SecurityManager so the JVM is not a security boundary, and isolation must live below it. Defaults are fail-closed and have no opt-out:
|
||||||
|
|
||||||
|
- `cap_drop` = every `Capability.values()` (effectively ALL — docker-java's enum has no `ALL` constant). Outbound TCP still works (no caps needed); raw sockets, ptrace, mounts, and bind <1024 are denied.
|
||||||
|
- `security_opt`: `no-new-privileges:true`, `apparmor=docker-default`. Default seccomp profile is applied implicitly when `seccomp=` is absent.
|
||||||
|
- `read_only` rootfs = true.
|
||||||
|
- `pids_limit` = 512 (`PIDS_LIMIT` constant).
|
||||||
|
- `tmpfs` mount: `/tmp` with `rw,nosuid,size=256m`. **No `noexec`** — Netty/tcnative, Snappy, LZ4, Zstd dlopen native libs from `/tmp` via `mmap(PROT_EXEC)` which `noexec` blocks. Issue #153 will add per-app `writeableVolumes` for stateful tenants (Kafka Streams etc.).
|
||||||
|
- `userns_mode` = `host:1000:65536` on both loader and main. Container root is never UID 0 on the host — closes the last open hardening item from issue #152.
|
||||||
|
|
||||||
|
**Sandboxed runtime auto-detect**: at construction the orchestrator calls `dockerClient.infoCmd().exec().getRuntimes()` and uses `runsc` (gVisor) when present. Override with `cameleer.server.runtime.dockerruntime` (e.g. `kata` to force Kata Containers, or any other registered runtime). Empty/blank = auto. The override always wins over auto-detect. The `DockerRuntimeOrchestrator(DockerClient, String)` constructor is the canonical entry point; the single-arg constructor exists only as a convenience for tests that don't need an override.
|
||||||
|
|
||||||
|
## Init-Container Loader Pattern (JAR fetch)
|
||||||
|
|
||||||
|
`startContainer` is now a two-phase op per replica:
|
||||||
|
|
||||||
|
1. **Volume create** — `cameleer-jars-{containerName}` named volume (per-replica, deterministic so cleanup in `removeContainer` can derive it).
|
||||||
|
2. **Loader container** — `loaderImage` (default `registry.cameleer.io/cameleer/cameleer-runtime-loader:latest`, **built and published by the cameleer-saas repo** at `docker/runtime-loader/`; CI pushes to `gitea.siegeln.net` under the same path — both names resolve to the same registry), name `{containerName}-loader`, mount the volume **RW at `/app/jars`**, env vars `ARTIFACT_URL` + `ARTIFACT_EXPECTED_SIZE`. Loader downloads the JAR from the signed URL into the volume and exits 0. Orchestrator blocks on `waitContainerCmd().exec(WaitContainerResultCallback).awaitStatusCode(120, SECONDS)`. Loader container is removed in a `finally` block; on non-zero exit the volume is also removed and `RuntimeException` propagates so `DeploymentExecutor` marks the deployment FAILED. **Loader logs are captured before removal** (`captureLoaderLogs` — `logContainerCmd` with `withTail(50)`, capped at 4096 chars, 5s timeout) and appended to the thrown `RuntimeException` message as `". loader output: <text>"`. Best-effort: log-capture failures are swallowed and don't mask the original exit. The loader image's Dockerfile pre-creates `/app/jars` owned by `loader:loader` (UID 1000) so the orchestrator's fresh named volume initialises with that ownership — without it the empty volume comes up as `root:root 0755` and wget exits 1 with "Permission denied". `LoaderHardeningIT` is the cross-repo contract test (pulls the published `:latest` and asserts exit 0 under the orchestrator's hardening shape).
|
||||||
|
3. **Main container** — same hardening contract, mount the same volume **RO at `/app/jars`**, entrypoint reads `/app/jars/app.jar` (Spring Boot/Quarkus: `-jar /app/jars/app.jar`; plain Java: `-cp /app/jars/app.jar <MainClass>`; native: `exec /app/jars/app.jar`).
|
||||||
|
|
||||||
|
`removeContainer(id)` derives the volume name from the inspected container name (Docker prefixes it with `/`) and removes the volume after the container removes — blue/green doesn't leak volumes.
|
||||||
|
|
||||||
|
`DeploymentExecutor` generates the signed URL via `ArtifactDownloadTokenSigner.sign(appVersion.id(), Duration.ofSeconds(artifactTokenTtlSeconds))` and passes `appVersion.id()`, the URL, `appVersion.jarSizeBytes()`, and the loader image into `ContainerRequest`. The host filesystem is no longer involved at deploy time.
|
||||||
|
|
||||||
|
**Loader → server reachability**: the loader hits the Cameleer server from its **primary** Docker
|
||||||
|
network only (`request.network()`, set from `CAMELEER_SERVER_RUNTIME_DOCKERNETWORK`). Additional networks
|
||||||
|
(`cameleer-traefik`, per-env) are attached by `DockerNetworkManager.connectContainer` AFTER `startContainer`
|
||||||
|
returns — by which time the loader has already exited. The loader cannot use them. The signed URL is built
|
||||||
|
from `cameleer.server.runtime.artifactbaseurl` (preferred), falling back to `cameleer.server.runtime.serverurl`,
|
||||||
|
falling back to `http://cameleer-server:8081`. The default works in SaaS mode because the tenant's primary
|
||||||
|
network (`cameleer-tenant-{slug}`) hosts the tenant's own server — same `CAMELEER_SERVER_RUNTIME_DOCKERNETWORK`
|
||||||
|
on both. For non-SaaS topologies, set `CAMELEER_SERVER_RUNTIME_ARTIFACTBASEURL` to a URL the loader can reach
|
||||||
|
on its primary network.
|
||||||
|
|
||||||
## DeploymentExecutor Details
|
## DeploymentExecutor Details
|
||||||
|
|
||||||
Primary network for app containers is set via `CAMELEER_SERVER_RUNTIME_DOCKERNETWORK` env var (in SaaS mode: `cameleer-tenant-{slug}`); apps also connect to `cameleer-traefik` (routing) and `cameleer-env-{tenantId}-{envSlug}` (per-environment discovery) as additional networks. Resolves `runtimeType: auto` to concrete type from `AppVersion.detectedRuntimeType` at PRE_FLIGHT (fails deployment if unresolvable). Builds Docker entrypoint per runtime type (all JVM types use `-javaagent:/app/agent.jar -jar`, plain Java uses `-cp` with main class, native runs binary directly). Sets per-replica `CAMELEER_AGENT_INSTANCEID` env var to `{envSlug}-{appSlug}-{replicaIndex}` so container logs and agent logs share the same instance identity. Sets `CAMELEER_AGENT_*` env vars from `ResolvedContainerConfig` (routeControlEnabled, replayEnabled, health port). These are startup-only agent properties — changing them requires redeployment.
|
Primary network for app containers is set via `CAMELEER_SERVER_RUNTIME_DOCKERNETWORK` env var (in SaaS mode: `cameleer-tenant-{slug}`); apps also connect to `cameleer-traefik` (routing) and `cameleer-env-{tenantId}-{envSlug}` (per-environment discovery) as additional networks. Resolves `runtimeType: auto` to concrete type from `AppVersion.detectedRuntimeType` at PRE_FLIGHT (fails deployment if unresolvable). Builds Docker entrypoint per runtime type (all JVM types use `-javaagent:/app/agent.jar -jar`, plain Java uses `-cp` with main class, native runs binary directly). Sets per-replica `CAMELEER_AGENT_INSTANCEID` env var to `{envSlug}-{appSlug}-{replicaIndex}-{generation}` so container logs and agent logs share the same instance identity. Sets `CAMELEER_AGENT_*` env vars from `ResolvedContainerConfig` (routeControlEnabled, replayEnabled, health port). These are startup-only agent properties — changing them requires redeployment.
|
||||||
|
|
||||||
|
**Container naming** — `{tenantId}-{envSlug}-{appSlug}-{replicaIndex}-{generation}`, where `generation` is the first 8 characters of the deployment UUID. The generation suffix lets old + new replicas coexist during a blue/green swap (deterministic names without a generation used to 409). All lookups across the executor, `DockerEventMonitor`, and `ContainerLogForwarder` key on container **id**, not name — the name is operator-visibility only.
|
||||||
|
|
||||||
|
**Strategy dispatch** — `DeploymentStrategy.fromWire(config.deploymentStrategy())` branches the executor. Unknown values fall back to BLUE_GREEN so misconfiguration never throws at runtime.
|
||||||
|
|
||||||
|
- **Blue/green** (default): start all N new replicas → wait for ALL healthy → stop the previous deployment. Resource peak ≈ 2× replicas for the health-check window. Partial health aborts with status FAILED; the previous deployment is preserved untouched (user's safety net).
|
||||||
|
- **Rolling**: replace replicas one at a time — start new[i] → wait healthy → stop old[i] → next. Resource peak = replicas + 1. Mid-rollout health failure stops in-flight new containers and aborts; already-replaced old replicas are NOT restored (not reversible) but un-replaced old[i+1..N] keep serving traffic. User redeploys to recover.
|
||||||
|
|
||||||
|
Traffic routing is implicit: Traefik labels (`cameleer.app`, `cameleer.environment`) are generation-agnostic, so new replicas attract load balancing as soon as they come up healthy — no explicit swap step.
|
||||||
|
|
||||||
## Deployment Status Model
|
## Deployment Status Model
|
||||||
|
|
||||||
@@ -34,23 +78,20 @@ Primary network for app containers is set via `CAMELEER_SERVER_RUNTIME_DOCKERNET
|
|||||||
| `STOPPED` | Intentionally stopped or initial state |
|
| `STOPPED` | Intentionally stopped or initial state |
|
||||||
| `STARTING` | Deploy in progress |
|
| `STARTING` | Deploy in progress |
|
||||||
| `RUNNING` | All replicas healthy and serving |
|
| `RUNNING` | All replicas healthy and serving |
|
||||||
| `DEGRADED` | Some replicas healthy, some dead |
|
| `DEGRADED` | Post-deploy: a replica died after the deploy was marked RUNNING. Set by `DockerEventMonitor` reconciliation, never by `DeploymentExecutor` directly. |
|
||||||
| `STOPPING` | Graceful shutdown in progress |
|
| `STOPPING` | Graceful shutdown in progress |
|
||||||
| `FAILED` | Terminal failure (pre-flight, health check, or crash) |
|
| `FAILED` | Terminal failure (pre-flight, health check, or crash). Partial-healthy deploys now mark FAILED — DEGRADED is reserved for post-deploy drift. |
|
||||||
|
|
||||||
**Replica support**: deployments can specify a replica count. `DEGRADED` is used when at least one but not all replicas are healthy.
|
**Deploy stages** (`DeployStage`): PRE_FLIGHT -> PULL_IMAGE -> CREATE_NETWORK -> START_REPLICAS -> HEALTH_CHECK -> SWAP_TRAFFIC -> COMPLETE (or FAILED at any stage). Rolling reuses the same stage labels inside the per-replica loop; the UI progress bar shows the most recent stage.
|
||||||
|
|
||||||
**Deploy stages** (`DeployStage`): PRE_FLIGHT -> PULL_IMAGE -> CREATE_NETWORK -> START_REPLICAS -> HEALTH_CHECK -> SWAP_TRAFFIC -> COMPLETE (or FAILED at any stage).
|
**Deployment retention**: `DeploymentService.createDeployment()` deletes FAILED deployments for the same app+environment before creating a new one, preventing failed-attempt buildup. STOPPED deployments are preserved as restorable checkpoints — the UI Checkpoints disclosure lists every deployment with a non-null `deployed_config_snapshot` (RUNNING, DEGRADED, STOPPED) minus the current one.
|
||||||
|
|
||||||
**Blue/green strategy**: when re-deploying, new replicas are started and health-checked before old ones are stopped, minimising downtime.
|
|
||||||
|
|
||||||
**Deployment uniqueness**: `DeploymentService.createDeployment()` deletes any STOPPED/FAILED deployments for the same app+environment before creating a new one, preventing duplicate rows.
|
|
||||||
|
|
||||||
## JAR Management
|
## JAR Management
|
||||||
|
|
||||||
- **Retention policy** per environment: configurable maximum number of JAR versions to keep. Older JARs are deleted automatically.
|
- **Retention policy** per environment: configurable maximum number of JAR versions to keep. Older JARs are deleted automatically.
|
||||||
- **Nightly cleanup job** (`JarRetentionJob`, Spring `@Scheduled` 03:00): purges JARs exceeding the retention limit and removes orphaned files not referenced by any app version. Skips versions currently deployed.
|
- **Nightly cleanup job** (`JarRetentionJob`, Spring `@Scheduled` 03:00): purges JARs exceeding the retention limit and removes orphaned files not referenced by any app version. Skips versions currently deployed.
|
||||||
- **Volume-based JAR mounting** for Docker-in-Docker setups: set `CAMELEER_SERVER_RUNTIME_JARDOCKERVOLUME` to the Docker volume name that contains the JAR storage directory. When set, the orchestrator mounts this volume into the container instead of bind-mounting the host path (required when the SaaS container itself runs inside Docker and the host path is not accessible from sibling containers).
|
- **Storage abstraction**: `ArtifactStore` (in `cameleer-server-core/storage`) is the only path that touches JAR bytes. `FilesystemArtifactStore` writes under `cameleer.server.runtime.jarstoragepath` (default `/data/jars`); the orchestrator never reads the host filesystem at deploy time.
|
||||||
|
- **Loader-fetch at deploy time**: tenant containers no longer bind-mount JARs from the host. The loader init-container streams the JAR via a signed URL (HMAC-SHA256, TTL `cameleer.server.runtime.artifacttokenttlseconds`, default 600s) into a per-replica named volume; main mounts that volume RO. This works without host-path access and is the single path supported in Docker-in-Docker SaaS deployments.
|
||||||
|
|
||||||
## Runtime Type Detection
|
## Runtime Type Detection
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,9 @@ paths:
|
|||||||
|
|
||||||
# Prometheus Metrics
|
# Prometheus Metrics
|
||||||
|
|
||||||
Server exposes `/api/v1/prometheus` (unauthenticated, Prometheus text format). Spring Boot Actuator provides JVM, GC, thread pool, and `http.server.requests` metrics automatically. Business metrics via `ServerMetrics` component:
|
Server exposes `/api/v1/prometheus` (unauthenticated, Prometheus text format). Spring Boot Actuator provides JVM, GC, thread pool, and `http.server.requests` metrics automatically. Business metrics via `ServerMetrics` component.
|
||||||
|
|
||||||
|
The same `MeterRegistry` is also snapshotted to ClickHouse every 60 s by `ServerMetricsSnapshotScheduler` (see "Server self-metrics persistence" at the bottom of this file) — so historical server-health data survives restarts without an external Prometheus.
|
||||||
|
|
||||||
## Gauges (auto-polled)
|
## Gauges (auto-polled)
|
||||||
|
|
||||||
@@ -83,3 +85,23 @@ Mean processing time = `camel.route.policy.total_time / camel.route.policy.count
|
|||||||
| `cameleer.sse.reconnects.count` | counter | `instanceId` |
|
| `cameleer.sse.reconnects.count` | counter | `instanceId` |
|
||||||
| `cameleer.taps.evaluated.count` | counter | `instanceId` |
|
| `cameleer.taps.evaluated.count` | counter | `instanceId` |
|
||||||
| `cameleer.metrics.exported.count` | counter | `instanceId` |
|
| `cameleer.metrics.exported.count` | counter | `instanceId` |
|
||||||
|
|
||||||
|
## Server self-metrics persistence
|
||||||
|
|
||||||
|
`ServerMetricsSnapshotScheduler` walks `MeterRegistry.getMeters()` every 60 s (configurable via `cameleer.server.self-metrics.interval-ms`) and writes one row per Micrometer `Measurement` to the ClickHouse `server_metrics` table. Full registry is captured — Spring Boot Actuator series (`jvm.*`, `process.*`, `http.server.requests`, `hikaricp.*`, `jdbc.*`, `tomcat.*`, `logback.events`, `system.*`) plus `cameleer.*` and `alerting_*`.
|
||||||
|
|
||||||
|
**Table** (`cameleer-server-app/src/main/resources/clickhouse/init.sql`):
|
||||||
|
|
||||||
|
```
|
||||||
|
server_metrics(tenant_id, collected_at, server_instance_id,
|
||||||
|
metric_name, metric_type, statistic, metric_value,
|
||||||
|
tags Map(String,String), server_received_at)
|
||||||
|
```
|
||||||
|
|
||||||
|
- `metric_type` — lowercase Micrometer `Meter.Type` (counter, gauge, timer, distribution_summary, long_task_timer, other)
|
||||||
|
- `statistic` — Micrometer `Statistic.getTagValueRepresentation()` (value, count, total, total_time, max, mean, active_tasks, duration). Timers emit 3 rows per tick (count + total_time + max); gauges/counters emit 1 (`statistic='value'` or `'count'`).
|
||||||
|
- No `environment` column — the server is env-agnostic.
|
||||||
|
- `tenant_id` threaded from `cameleer.server.tenant.id` (single-tenant per server).
|
||||||
|
- `server_instance_id` resolved once at boot by `ServerInstanceIdConfig` (property → HOSTNAME → localhost → UUID fallback). Rotates across restarts so counter resets are unambiguous.
|
||||||
|
- TTL: 90 days (vs 365 for `agent_metrics`). Write-only in v1 — no query endpoint or UI page. Inspect via ClickHouse admin: `/api/v1/admin/clickhouse/query` or direct SQL.
|
||||||
|
- Toggle off entirely with `cameleer.server.self-metrics.enabled=false` (uses `@ConditionalOnProperty`).
|
||||||
|
|||||||
@@ -10,13 +10,18 @@ The UI has 4 main tabs: **Exchanges**, **Dashboard**, **Runtime**, **Deployments
|
|||||||
- **Exchanges** — route execution search and detail (`ui/src/pages/Exchanges/`)
|
- **Exchanges** — route execution search and detail (`ui/src/pages/Exchanges/`)
|
||||||
- **Dashboard** — metrics and stats with L1/L2/L3 drill-down (`ui/src/pages/DashboardTab/`)
|
- **Dashboard** — metrics and stats with L1/L2/L3 drill-down (`ui/src/pages/DashboardTab/`)
|
||||||
- **Runtime** — live agent status, logs, commands (`ui/src/pages/RuntimeTab/`). AgentHealth supports compact view (dense health-tinted cards) and expanded view (full GroupCard+DataTable per app). View mode persisted to localStorage.
|
- **Runtime** — live agent status, logs, commands (`ui/src/pages/RuntimeTab/`). AgentHealth supports compact view (dense health-tinted cards) and expanded view (full GroupCard+DataTable per app). View mode persisted to localStorage.
|
||||||
- **Deployments** — app management, JAR upload, deployment lifecycle (`ui/src/pages/AppsTab/`)
|
- **Deployments** — unified app deployment page (`ui/src/pages/AppsTab/`)
|
||||||
- Config sub-tabs: **Monitoring | Resources | Variables | Traces & Taps | Route Recording**
|
- Routes: `/apps` (list, `AppListView` in `AppsTab.tsx`), `/apps/new` + `/apps/:slug` (both render `AppDeploymentPage`).
|
||||||
- Create app: full page at `/apps/new` (not a modal)
|
- Identity & Artifact section always visible; name editable pre-first-deploy, read-only after. JAR picker client-stages; new JAR + any form edits flip the primary button from `Save` to `Redeploy`. Environment fixed to the currently-selected env (no selector).
|
||||||
- Deployment progress: `ui/src/components/DeploymentProgress.tsx` (7-stage step indicator)
|
- Config sub-tabs: **Monitoring | Resources | Variables | Sensitive Keys | Deployment | ● Traces & Taps | ● Route Recording**. The four staged tabs feed dirty detection; the `●` live tabs apply in real-time (amber LiveBanner + default `?apply=live` on their writes) and never mark dirty.
|
||||||
|
- Primary action state machine: `Save` → `Uploading… N%` (during JAR upload; button shows percent with a tinted progress-fill overlay) → `Redeploy` → `Deploying…` during active deploy. Upload progress sourced from `useUploadJar` (XHR `upload.onprogress` → page-level `uploadPct` state). The button is disabled during `uploading` and `deploying`.
|
||||||
|
- Checkpoints render as a collapsible `CheckpointsTable` (default **collapsed**) **inside the Identity & Artifact `configGrid`** as an in-grid row (`Checkpoints | ▸ Expand (N)` / `▾ Collapse (N)`). `CheckpointsTable` returns a React.Fragment of grid-ready children so the label + trigger align with the other identity rows; when opened, a third grid child spans both columns via `grid-column: 1 / -1` so the 7-column table gets full width. Wired through `IdentitySection.checkpointsSlot` — `CheckpointDetailDrawer` stays in `IdentitySection.children` because it portals. Columns: Version · JAR (filename) · Deployed by · Deployed (relative `timeAgo` + user-locale sub-line via `new Date(iso).toLocaleString()`) · Strategy · Outcome · ›. Row click opens the drawer. Drawer tabs are ordered **Config | Logs** with `Config` as the default. Config panel has Snapshot / Diff vs current view modes. Replica filter in the Logs panel uses DS `Select`. Restore lives in the drawer footer (forces review). Visible row cap = `Environment.jarRetentionCount` (default 10 if 0/null); older rows accessible via "Show older (N)" expander. Currently-running deployment is excluded — represented separately by `StatusCard`. The empty-checkpoints case returns `null` (no row). The legacy `Checkpoints.tsx` row-list component is gone.
|
||||||
|
- Deployment tab: `StatusCard` + `DeploymentProgress` (during STARTING / FAILED) + flex-grow `StartupLogPanel` (no fixed maxHeight). Auto-activates when a deploy starts. The former `HistoryDisclosure` is retired — per-deployment config and logs live in the Checkpoints drawer. `StartupLogPanel` header mirrors the Runtime Application Log pattern: title + live/stopped badge + `N entries` + sort toggle (↑/↓, default **desc**) + refresh icon (`RefreshCw`). Sort drives the backend fetch via `useStartupLogs(…, sort)` so the 500-line limit returns the window closest to the user's interest; display order matches fetch order. Refresh scrolls to the latest edge (top for desc, bottom for asc). Sort + refresh buttons disable while a refetch is in flight. 3s polling while STARTING is unchanged.
|
||||||
|
- Unsaved-change router blocker uses DS `AlertDialog` (not `window.beforeunload`). Env switch intentionally discards edits without warning.
|
||||||
|
|
||||||
**Admin pages** (ADMIN-only, under `/admin/`):
|
**Admin pages** (ADMIN-only, under `/admin/`):
|
||||||
- **Sensitive Keys** (`ui/src/pages/Admin/SensitiveKeysPage.tsx`) — global sensitive key masking config. Shows agent built-in defaults as outlined Badge reference, editable Tag pills for custom keys, amber-highlighted push-to-agents toggle. Keys add to (not replace) agent defaults. Per-app sensitive key additions managed via `ApplicationConfigController` API. Note: `AppConfigDetailPage.tsx` exists but is not routed in `router.tsx`.
|
- **Sensitive Keys** (`ui/src/pages/Admin/SensitiveKeysPage.tsx`) — global sensitive key masking config. Shows agent built-in defaults as outlined Badge reference, editable Tag pills for custom keys, amber-highlighted push-to-agents toggle. Keys add to (not replace) agent defaults. Per-app sensitive key additions managed via `ApplicationConfigController` API. Note: `AppConfigDetailPage.tsx` exists but is not routed in `router.tsx`.
|
||||||
|
- **Server Metrics** (`ui/src/pages/Admin/ServerMetricsAdminPage.tsx`) — dashboard over the `server_metrics` ClickHouse table. Visibility matches Database/ClickHouse pages: gated on `capabilities.infrastructureEndpoints` in `buildAdminTreeNodes`; backend is `@ConditionalOnProperty(infrastructureendpoints) + @PreAuthorize('hasRole(ADMIN)')`. Uses the generic `/api/v1/admin/server-metrics/{catalog,instances,query}` API via `ui/src/api/queries/admin/serverMetrics.ts` hooks (`useServerMetricsCatalog`, `useServerMetricsInstances`, `useServerMetricsSeries`), all three of which take a `ServerMetricsRange = { from: Date; to: Date }`. Time range is driven by the global TopBar picker via `useGlobalFilters()` — no page-local selector; bucket size auto-scales through `stepSecondsFor(windowSeconds)` (10 s up to 1 h buckets). Toolbar is just server-instance badges. Sections: Server health (agents/ingestion/auth), JVM (memory/CPU/GC/threads), HTTP & DB pools, Alerting (conditional on catalog), Deployments (conditional on catalog). Each panel is a `ThemedChart` with `Line`/`Area` children from the design system; multi-series responses are flattened into overlap rows by bucket timestamp. Alerting and Deployments rows are hidden when their metrics aren't in the catalog (zero-deploy / alerting-disabled installs).
|
||||||
|
|
||||||
## Key UI Files
|
## Key UI Files
|
||||||
|
|
||||||
@@ -25,11 +30,38 @@ The UI has 4 main tabs: **Exchanges**, **Dashboard**, **Runtime**, **Deployments
|
|||||||
- `ui/src/auth/auth-store.ts` — Zustand: accessToken, user, roles, login/logout
|
- `ui/src/auth/auth-store.ts` — Zustand: accessToken, user, roles, login/logout
|
||||||
- `ui/src/api/environment-store.ts` — Zustand: selected environment (localStorage)
|
- `ui/src/api/environment-store.ts` — Zustand: selected environment (localStorage)
|
||||||
- `ui/src/components/ContentTabs.tsx` — main tab switcher
|
- `ui/src/components/ContentTabs.tsx` — main tab switcher
|
||||||
|
- `ui/src/components/EnvironmentSwitcherButton.tsx` + `EnvironmentSwitcherModal.tsx` — explicit env picker (button in TopBar; DS `Modal`-based list). Replaces the retired `EnvironmentSelector` (All-Envs dropdown). When `envRecords.length > 0` and the stored `selectedEnv` no longer matches any env, `LayoutShell` opens the modal in `forced` mode (non-dismissible). Switcher pulls env records from `useEnvironments()` (admin endpoint; readable by VIEWER+).
|
||||||
|
- `ui/src/components/env-colors.ts` + `ui/src/styles/env-colors.css` — 8-swatch preset palette for the per-environment color indicator. Tokens `--env-color-slate/red/amber/green/teal/blue/purple/pink` are defined for both light and dark themes. `envColorVar(name)` falls back to `slate` for unknown values. `LayoutShell` renders a 3px fixed top bar in the current env's color (z-index 900, below DS modals).
|
||||||
- `ui/src/components/ExecutionDiagram/` — interactive trace view (canvas)
|
- `ui/src/components/ExecutionDiagram/` — interactive trace view (canvas)
|
||||||
- `ui/src/components/ProcessDiagram/` — ELK-rendered route diagram
|
- `ui/src/components/ProcessDiagram/` — ELK-rendered route diagram
|
||||||
- `ui/src/hooks/useScope.ts` — TabKey type, scope inference
|
- `ui/src/hooks/useScope.ts` — TabKey type, scope inference
|
||||||
- `ui/src/components/StartupLogPanel.tsx` — deployment startup log viewer (container logs from ClickHouse, polls 3s while STARTING)
|
- `ui/src/components/StartupLogPanel.tsx` — deployment startup log viewer (container logs from ClickHouse, polls 3s while STARTING)
|
||||||
- `ui/src/api/queries/logs.ts` — `useStartupLogs` hook for container startup log polling, `useLogs`/`useApplicationLogs` for general log search
|
- `ui/src/api/queries/logs.ts` — `useStartupLogs` hook for container startup log polling, `useLogs`/`useApplicationLogs` for bounded log search (single page), `useInfiniteApplicationLogs` for streaming log views (cursor-paginated, server-side source/level filters)
|
||||||
|
- `ui/src/api/queries/agents.ts` — `useAgents` for agent list, `useInfiniteAgentEvents` for cursor-paginated timeline stream
|
||||||
|
- `ui/src/hooks/useInfiniteStream.ts` — tanstack `useInfiniteQuery` wrapper with top-gated auto-refetch, flattened `items[]`, and `refresh()` invalidator
|
||||||
|
- `ui/src/components/InfiniteScrollArea.tsx` — scrollable container with IntersectionObserver top/bottom sentinels. Streaming log/event views use this + `useInfiniteStream`. Bounded views (LogTab, StartupLogPanel) keep `useLogs`/`useStartupLogs`
|
||||||
|
- `ui/src/components/SideDrawer.tsx` — project-local right-slide drawer (DS has Modal but no Drawer). Portal-rendered, ESC + transparent-backdrop click closes, sticky header/footer, sizes md/lg/xl. Currently consumed only by `CheckpointDetailDrawer` — promote to `@cameleer/design-system` once a second consumer appears.
|
||||||
|
|
||||||
|
## Alerts
|
||||||
|
|
||||||
|
- **Sidebar section** (`buildAlertsTreeNodes` in `ui/src/components/sidebar-utils.ts`) — Inbox, Rules, Silences.
|
||||||
|
- **Routes** in `ui/src/router.tsx`: `/alerts` (redirect to inbox), `/alerts/inbox`, `/alerts/rules`, `/alerts/rules/new`, `/alerts/rules/:id`, `/alerts/silences`. No redirects for the retired `/alerts/all` and `/alerts/history` — stale URLs 404 per the clean-break policy.
|
||||||
|
- **Pages** under `ui/src/pages/Alerts/`:
|
||||||
|
- `InboxPage.tsx` — single filterable inbox. Filters: severity (multi), state (PENDING/FIRING/RESOLVED, default FIRING), Hide acked toggle (default on), Hide read toggle (default on). Row actions: Acknowledge, Mark read, Silence rule… (duration quick menu), Delete (OPERATOR+, soft-delete with undo toast wired to `useRestoreAlert`). Bulk toolbar (selection-driven): Acknowledge N · Mark N read · Silence rules · Delete N (ConfirmDialog; OPERATOR+).
|
||||||
|
- `SilenceRuleMenu.tsx` — DS `Dropdown`-based duration picker (1h / 8h / 24h / Custom…). Used by the row-level and bulk silence actions. "Custom…" navigates to `/alerts/silences?ruleId=<id>`.
|
||||||
|
- `RulesListPage.tsx` — CRUD + enable/disable toggle + env-promotion dropdown (pure UI prefill, no new endpoint).
|
||||||
|
- `RuleEditor/RuleEditorWizard.tsx` — 5-step wizard (Scope / Condition / Trigger / Notify / Review). `form-state.ts` is the single source of truth (`initialForm` / `toRequest` / `validateStep`). Seven condition-form subcomponents under `RuleEditor/condition-forms/` — including `AgentLifecycleForm.tsx` (multi-select event-type chips for the six-entry `AgentLifecycleEventType` allowlist + lookback-window input).
|
||||||
|
- `SilencesPage.tsx` — matcher-based create + end-early. Reads `?ruleId=` search param to prefill the Rule ID field (driven by InboxPage's "Silence rule… → Custom…" flow).
|
||||||
|
- `AlertRow.tsx` shared list row; `alerts-page.module.css` shared styling.
|
||||||
|
- **Components**:
|
||||||
|
- `NotificationBell.tsx` — polls `/alerts/unread-count` every 30 s (paused when tab hidden via TanStack Query `refetchIntervalInBackground: false`).
|
||||||
|
- `AlertStateChip.tsx`, `SeverityBadge.tsx` — shared state/severity indicators.
|
||||||
|
- `MustacheEditor/` — CodeMirror 6 editor with variable autocomplete + inline linter. Shared between rule title/message, webhook body/header overrides, and (future) Admin Outbound Connection editor (reduced-context mode for URL).
|
||||||
|
- `MustacheEditor/alert-variables.ts` — variable registry aligned with `NotificationContextBuilder.java`. Add new leaves here whenever the backend context grows.
|
||||||
|
- **API queries** under `ui/src/api/queries/`: `alerts.ts`, `alertRules.ts`, `alertSilences.ts`, `alertNotifications.ts`, `alertMeta.ts`. All env-scoped via `useSelectedEnv` from `alertMeta`.
|
||||||
|
- **CMD-K**: `buildAlertSearchData` in `LayoutShell.tsx` registers `alert` and `alertRule` result categories. Badges convey severity + state. Palette navigates directly to the deep-link path — no sidebar-reveal state for alerts.
|
||||||
|
- **Sidebar accordion**: entering `/alerts/*` collapses Applications + Admin + Starred (mirrors Admin accordion).
|
||||||
|
- **Top-nav**: `<NotificationBell />` is the first child of `<TopBar>`, sitting alongside `SearchTrigger` + status `ButtonGroup` + `TimeRangeDropdown` + `AutoRefreshToggle`.
|
||||||
|
|
||||||
## UI Styling
|
## UI Styling
|
||||||
|
|
||||||
|
|||||||
@@ -5,8 +5,20 @@ on:
|
|||||||
branches: [main, 'feature/**', 'fix/**', 'feat/**']
|
branches: [main, 'feature/**', 'fix/**', 'feat/**']
|
||||||
tags-ignore:
|
tags-ignore:
|
||||||
- 'v*'
|
- 'v*'
|
||||||
|
paths-ignore:
|
||||||
|
- '.planning/**'
|
||||||
|
- 'docs/**'
|
||||||
|
- '**/*.md'
|
||||||
|
- '.claude/**'
|
||||||
|
- 'AGENTS.md'
|
||||||
|
- 'CLAUDE.md'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
|
paths-ignore:
|
||||||
|
- '.planning/**'
|
||||||
|
- 'docs/**'
|
||||||
|
- '**/*.md'
|
||||||
|
- '.claude/**'
|
||||||
delete:
|
delete:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
@@ -45,11 +57,25 @@ jobs:
|
|||||||
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
|
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
|
||||||
restore-keys: ${{ runner.os }}-maven-
|
restore-keys: ${{ runner.os }}-maven-
|
||||||
|
|
||||||
|
- name: Cache npm registry
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: ~/.npm
|
||||||
|
key: ${{ runner.os }}-npm-${{ hashFiles('ui/package-lock.json') }}
|
||||||
|
restore-keys: ${{ runner.os }}-npm-
|
||||||
|
|
||||||
|
- name: Cache Vite build artifacts
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: ui/node_modules/.vite
|
||||||
|
key: ${{ runner.os }}-vite-${{ hashFiles('ui/package-lock.json', 'ui/vite.config.ts') }}
|
||||||
|
restore-keys: ${{ runner.os }}-vite-
|
||||||
|
|
||||||
- name: Build UI
|
- name: Build UI
|
||||||
working-directory: ui
|
working-directory: ui
|
||||||
run: |
|
run: |
|
||||||
echo '//gitea.siegeln.net/api/packages/cameleer/npm/:_authToken=${REGISTRY_TOKEN}' >> .npmrc
|
echo '//gitea.siegeln.net/api/packages/cameleer/npm/:_authToken=${REGISTRY_TOKEN}' >> .npmrc
|
||||||
npm ci
|
npm ci --prefer-offline --no-audit --fund=false
|
||||||
npm run build
|
npm run build
|
||||||
env:
|
env:
|
||||||
REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }}
|
REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }}
|
||||||
@@ -58,6 +84,12 @@ jobs:
|
|||||||
- name: Build and Test
|
- name: Build and Test
|
||||||
run: mvn clean verify -DskipITs -U --batch-mode
|
run: mvn clean verify -DskipITs -U --batch-mode
|
||||||
|
|
||||||
|
- name: Deploy minter to Maven registry
|
||||||
|
if: github.event_name == 'push'
|
||||||
|
run: mvn deploy -DskipTests -DskipITs --batch-mode -pl .,cameleer-license-api,cameleer-server-core,cameleer-license-minter
|
||||||
|
env:
|
||||||
|
REGISTRY_TOKEN: ${{ secrets.REGISTRY_TOKEN }}
|
||||||
|
|
||||||
docker:
|
docker:
|
||||||
needs: build
|
needs: build
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|||||||
120
.planning/it-triage-report.md
Normal file
120
.planning/it-triage-report.md
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
# IT Triage Report — 2026-04-21
|
||||||
|
|
||||||
|
Branch: `main`, starting HEAD `90460705` (chore: refresh GitNexus index stats).
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
- **Starting state**: 65 IT failures (46 F + 19 E) out of 555 tests on a clean build. Side-note: `target/classes` incremental-build staleness from the `90083f88` V1..V18 → V1 schema collapse makes the number look worse (every context load dies on `Flyway V2__claim_mapping.sql failed`). A fresh `mvn clean verify` gives the real 65.
|
||||||
|
- **Final state**: **12 failures across 3 test classes** (`AgentSseControllerIT`, `SseSigningIT`, `ClickHouseStatsStoreIT`). **53 failures closed across 14 test classes.**
|
||||||
|
- **11 commits landed on local `main`** (not pushed).
|
||||||
|
- No new env vars, endpoints, tables, or columns added. `V1__init.sql` untouched. No tests rewritten to pass-by-weakening — every assertion change is accompanied by a comment explaining the contract it now captures.
|
||||||
|
|
||||||
|
## Commits (in order)
|
||||||
|
|
||||||
|
| SHA | Test classes | What changed |
|
||||||
|
|---|---|---|
|
||||||
|
| `7436a37b` | AgentRegistrationControllerIT | environmentId, flat→env URL, heartbeat auto-heal, absolute sseEndpoint |
|
||||||
|
| `97a6b2e0` | AgentCommandControllerIT | environmentId, CommandGroupResponse new shape (200 w/ aggregate replies) |
|
||||||
|
| `e955302f` | BootstrapTokenIT / JwtRefreshIT / RegistrationSecurityIT / SseSigningIT / AgentSseControllerIT | environmentId in register bodies; AGENT-role smoke target; drop flaky iat-coupled assertion |
|
||||||
|
| `10e2b699` | SecurityFilterIT | env-scoped agent list URL |
|
||||||
|
| `9bda4d8f` | FlywayMigrationIT, ConfigEnvIsolationIT | decouple from shared Testcontainers Postgres state |
|
||||||
|
| `36571013` | (docs) | first version of this report |
|
||||||
|
| `dfacedb0` | DetailControllerIT | **Cluster B template**: ExecutionChunk envelope + REST-driven lookup |
|
||||||
|
| `87bada1f` | ExecutionControllerIT, MetricsControllerIT | Chunk payloads + REST flush-visibility probes |
|
||||||
|
| `a6e7458a` | DiagramControllerIT, DiagramRenderControllerIT | Env-scoped render + execution-detail-derived content hash for flat SVG path |
|
||||||
|
| `56844799` | SearchControllerIT | 10 seed payloads → ExecutionChunk; fix AGENT→VIEWER token on search GET |
|
||||||
|
| `d5adaaab` | DiagramLinkingIT, IngestionSchemaIT | REST for diagramContentHash + processor-tree/snapshot assertions |
|
||||||
|
| `8283d531` | ClickHouseChunkPipelineIT, ClickHouseExecutionReadIT | Replace removed `/clickhouse/V2_.sql` with consolidated init.sql; correct `iteration` vs `loopIndex` on seq-based tree path |
|
||||||
|
| `95f90f43` | ForwardCompatIT, ProtocolVersionIT, BackpressureIT | Chunk payload; fix wrong property-key prefix in BackpressureIT (+ MetricsFlushScheduler's separate `ingestion.flush-interval-ms` key) |
|
||||||
|
| `b55221e9` | SensitiveKeysAdminControllerIT | assert pushResult shape, not exact 0 (shared registry across ITs) |
|
||||||
|
|
||||||
|
## The single biggest insight
|
||||||
|
|
||||||
|
**`ExecutionController` (legacy PG path) is dead code.** It's `@ConditionalOnMissingBean(ChunkAccumulator.class)` and `ChunkAccumulator` is registered **unconditionally** in `StorageBeanConfig.java:92`, so `ExecutionController` never binds. Even if it did, `IngestionService.upsert` → `ClickHouseExecutionStore.upsert` throws `UnsupportedOperationException("ClickHouse writes use the chunked pipeline")` — the only `ExecutionStore` impl in `src/main/java` is ClickHouse, the Postgres variant lives in a planning doc only.
|
||||||
|
|
||||||
|
Practical consequences for every IT that was exercising `/api/v1/data/executions`:
|
||||||
|
1. `ChunkIngestionController` owns the URL and expects an `ExecutionChunk` envelope (`exchangeId`, `applicationId`, `instanceId`, `routeId`, `status`, `startTime`, `endTime`, `durationMs`, `chunkSeq`, `final`, `processors: FlatProcessorRecord[]`) — the legacy `RouteExecution` shape was being silently degraded to an empty/degenerate chunk.
|
||||||
|
2. The test payload changes are accompanied by assertion changes that now go through REST endpoints instead of raw SQL against the (ClickHouse-resident) `executions` / `processor_executions` / `route_diagrams` / `agent_metrics` tables.
|
||||||
|
3. **Recommendation for cleanup**: remove `ExecutionController` + the `upsert` path in `IngestionService` + the stubbed `ClickHouseExecutionStore.upsert` throwers. Separate PR. Happy to file.
|
||||||
|
|
||||||
|
## Cluster breakdown
|
||||||
|
|
||||||
|
**Cluster A — missing `environmentId` in register bodies (DONE)**
|
||||||
|
Root cause: `POST /api/v1/agents/register` now 400s without `environmentId`. Test payloads minted before this requirement. Fixed across all agent-registering ITs plus side-cleanups (flaky iat-coupled assertion in JwtRefreshIT, wrong RBAC target in can-access tests, absolute vs relative sseEndpoint).
|
||||||
|
|
||||||
|
**Cluster B — ingestion payload drift (DONE per user direction)**
|
||||||
|
All controller + storage ITs that posted `RouteExecution` JSON now post `ExecutionChunk` envelopes. All CH-side assertions now go through REST endpoints (`/api/v1/environments/{env}/executions` search + `/api/v1/executions/{id}` detail + `/agents/{id}/metrics` + `/apps/{app}/routes/{route}/diagram`). DiagramRenderControllerIT's SVG tests still need a content hash → reads it off the execution-detail REST response rather than querying `route_diagrams`.
|
||||||
|
|
||||||
|
**Cluster C — flat URL drift (DONE)**
|
||||||
|
`/api/v1/agents` → `/api/v1/environments/{envSlug}/agents`. Two test classes touched.
|
||||||
|
|
||||||
|
**Cluster D — heartbeat auto-heal contract (DONE)**
|
||||||
|
`heartbeatUnknownAgent_returns404` renamed and asserts the 200 auto-heal path that `fb54f9cb` made the contract.
|
||||||
|
|
||||||
|
**Cluster E — individual drifts (DONE except three parked)**
|
||||||
|
|
||||||
|
| Test class | Status |
|
||||||
|
|---|---|
|
||||||
|
| FlywayMigrationIT | DONE (decouple from shared PG state) |
|
||||||
|
| ConfigEnvIsolationIT.findByEnvironment_excludesOtherEnvs | DONE (unique slug prefix) |
|
||||||
|
| ForwardCompatIT | DONE (chunk payload) |
|
||||||
|
| ProtocolVersionIT | DONE (chunk payload) |
|
||||||
|
| BackpressureIT | DONE (property-key prefix fix — see note below) |
|
||||||
|
| SensitiveKeysAdminControllerIT | DONE (assert shape not count) |
|
||||||
|
| ClickHouseChunkPipelineIT | DONE (consolidated init.sql) |
|
||||||
|
| ClickHouseExecutionReadIT | DONE (iteration vs loopIndex mapping) |
|
||||||
|
|
||||||
|
## PARKED — what you'll want to look at next
|
||||||
|
|
||||||
|
### 1. ClickHouseStatsStoreIT (8 failures) — timezone bug in production code
|
||||||
|
|
||||||
|
`ClickHouseStatsStore.buildStatsSql` uses `lit(Instant)` which formats as `'yyyy-MM-dd HH:mm:ss'` in UTC but with no timezone marker. ClickHouse parses that literal in the session timezone when comparing against the `DateTime`-typed `bucket` column in `stats_1m_*`. On a non-UTC CH host (e.g. CEST docker on a CEST laptop), the filter endpoint is off by the tz offset in hours and misses every row the MV bucketed.
|
||||||
|
|
||||||
|
I confirmed this by instrumenting the test: `toDateTime(bucket)` returned `12:00:00` for a row inserted with `start_time=10:00:00Z` (i.e. the stored UTC Unix timestamp but displayed in CEST), and the filter literal `'2026-03-31 10:05:00'` was being parsed as CEST → UTC 08:05 → excluded all rows.
|
||||||
|
|
||||||
|
**I didn't fix this** because the repair is in `src/main/java`, not the test. Two reasonable options:
|
||||||
|
- **Test-side**: pin the container TZ via `.withEnv("TZ", "UTC")` + include `use_time_zone=UTC` in the JDBC URL. I tried both; neither was sufficient on their own — the CH server reads `timezone` from its own config, not `$TZ`. Getting all three layers (container env, CH server config, JDBC driver) aligned needs dedicated effort.
|
||||||
|
- **Production-side (preferred)**: change `lit(Instant)` to `toDateTime('...', 'UTC')` or use the 3-arg `DateTime(3, 'UTC')` column type for `bucket`. That's a store change; would be caught by a matching unit test.
|
||||||
|
|
||||||
|
I did add the explicit `'default'` env to the seed `INSERT`s per your directive, but reverted it locally because the timezone bug swallowed the fix. The raw unchanged test is what's committed.
|
||||||
|
|
||||||
|
### 2. AgentSseControllerIT (3 failures) & SseSigningIT (1 failure) — SSE connection timing
|
||||||
|
|
||||||
|
All failing assertions are `awaitConnection(5000)` timeouts or `ConditionTimeoutException` on SSE stream observation. Not related to any spec drift I could identify — the SSE server is up (other tests in the same classes connect fine), and auth/JWT is accepted. Looks like a real race on either the SseConnectionManager registration or on the HTTP client's first-read flush. Needs a dedicated debug session with a minimal reproducer; not something I wanted to hack around with sleeps.
|
||||||
|
|
||||||
|
Specific tests:
|
||||||
|
- `AgentSseControllerIT.sseConnect_unknownAgent_returns404` — 5s `CompletableFuture.get` timeout on an HTTP GET that should return 404 synchronously. Suggests the client is waiting on body data that never arrives (SSE stream opens even on 404?).
|
||||||
|
- `AgentSseControllerIT.lastEventIdHeader_connectionSucceeds` — `stream.awaitConnection(5000)` false.
|
||||||
|
- `AgentSseControllerIT.pingKeepalive_receivedViaSseStream` — waits for an event line in the stream snapshot, never sees it.
|
||||||
|
- `SseSigningIT.deepTraceEvent_containsValidSignature` — same pattern.
|
||||||
|
|
||||||
|
The sibling tests (`SseSigningIT.configUpdateEvent_containsValidEd25519Signature`) pass in isolation, which strongly suggests order-dependent flakiness rather than a protocol break.
|
||||||
|
|
||||||
|
## Final verify command
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mvn -pl cameleer-server-app -am -Dit.test='!SchemaBootstrapIT' -Dtest='!*' -DfailIfNoTests=false -Dsurefire.failIfNoSpecifiedTests=false verify
|
||||||
|
```
|
||||||
|
|
||||||
|
Reports land in `cameleer-server-app/target/failsafe-reports/`. Expect **12 failures** in the three classes above. Everything else is green.
|
||||||
|
|
||||||
|
## Side notes worth flagging
|
||||||
|
|
||||||
|
- **Property-key inconsistency in the main code** — surfaced via BackpressureIT. `IngestionConfig` is bound under `cameleer.server.ingestion.*`, but `MetricsFlushScheduler.@Scheduled` reads `ingestion.flush-interval-ms` (no prefix, hyphenated). In production this means the flush-interval in `application.yml` isn't actually being honoured by the metrics flush — it stays at the 1s fallback. Separate cleanup.
|
||||||
|
- **Shared Testcontainers PG across IT classes** — several of the "cross-test state" fixes (FlywayMigrationIT, ConfigEnvIsolationIT, SensitiveKeysAdminControllerIT) are symptoms of one underlying issue: `AbstractPostgresIT` uses a singleton PG container, and nothing cleans between test classes. Could do with a global `@Sql("/test-reset.sql")` on `@BeforeAll`, but out of scope here.
|
||||||
|
- **Agent registry shared across ITs** — same class of issue. Doesn't bite until a test explicitly inspects registry membership (SensitiveKeys `pushResult.total`).
|
||||||
|
|
||||||
|
## Follow-up (2026-04-22) — 12 parked failures closed
|
||||||
|
|
||||||
|
All three parked clusters now green. 560/560 tests passing.
|
||||||
|
|
||||||
|
- **ClickHouseStatsStoreIT (8 failures)** — fixed in `a9a6b465`. Two-layer TZ fix: JVM default TZ pinned to UTC in `CameleerServerApplication.main()` (the ClickHouse JDBC 0.9.7 driver formats `java.sql.Timestamp` via `Timestamp.toString()`, which uses JVM default TZ — a CEST JVM shipping to a UTC CH server stored off-by-offset Unix timestamps), plus column-level `bucket DateTime('UTC')` on all `stats_1m_*` tables with explicit `toDateTime(..., 'UTC')` casts in MV projections and `ClickHouseStatsStore.lit(Instant)` as defence in depth.
|
||||||
|
- **MetricsFlushScheduler property-key drift** — fixed in `a6944911`. Scheduler now reads `${cameleer.server.ingestion.flush-interval-ms:1000}` (the SpEL-via-`@ingestionConfig` approach doesn't work because `@EnableConfigurationProperties` uses a compound bean name). BackpressureIT workaround property removed.
|
||||||
|
- **SSE flakiness (4 failures, `AgentSseControllerIT` + `SseSigningIT`)** — fixed in `41df042e`. Triage's "order-dependent flakiness" theory was wrong — all four reproduced in isolation. Three root causes: (a) `AgentSseController.events` auto-heal was over-permissive (spoofing vector), fixed with JWT-subject-equals-path-id check; (b) `SseConnectionManager.pingAll` read an unprefixed property key (`agent-registry.ping-interval-ms`), same family of bug as (a6944911); (c) SSE response headers didn't flush until the first `emitter.send()`, so `awaitConnection(5s)` assertions timed out under the 15s ping cadence — fixed by sending an initial `: connected` comment on `connect()`. Full diagnosis in `.planning/sse-flakiness-diagnosis.md`.
|
||||||
|
|
||||||
|
Plus the two prod-code cleanups from the ExecutionController-removal follow-ons:
|
||||||
|
|
||||||
|
- **Dead `SearchIndexer` subsystem** — removed in `98cbf8f3`. `ExecutionUpdatedEvent` had no publisher after `0f635576`, so the whole indexer + stats + `/admin/clickhouse/pipeline` endpoint + UI pipeline card carried zero signal.
|
||||||
|
- **Unused `TaggedExecution` record** — removed in `06c6f53b`.
|
||||||
|
|
||||||
|
Final verify: `mvn -pl cameleer-server-app -am -Dit.test='!SchemaBootstrapIT' ... verify` → **Tests run: 560, Failures: 0, Errors: 0, Skipped: 0**.
|
||||||
81
.planning/sse-flakiness-diagnosis.md
Normal file
81
.planning/sse-flakiness-diagnosis.md
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
# SSE Flakiness — Root-Cause Analysis
|
||||||
|
|
||||||
|
**Date:** 2026-04-21
|
||||||
|
**Tests:** `AgentSseControllerIT.sseConnect_unknownAgent_returns404`, `.lastEventIdHeader_connectionSucceeds`, `.pingKeepalive_receivedViaSseStream`, `SseSigningIT.deepTraceEvent_containsValidSignature`
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Not order-dependent flakiness (triage report was wrong). Three distinct root causes, one production bug and one test-infrastructure issue, all reproducible when running the classes in isolation.
|
||||||
|
|
||||||
|
## Reproduction
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mvn -pl cameleer-server-app -am -Dit.test='AgentSseControllerIT' -Dtest='!*' \
|
||||||
|
-DfailIfNoTests=false -Dsurefire.failIfNoSpecifiedTests=false verify
|
||||||
|
```
|
||||||
|
|
||||||
|
Result: 3 failures out of 7 tests with a cold CH container. Not order-dependent.
|
||||||
|
|
||||||
|
## Root causes
|
||||||
|
|
||||||
|
### 1. `AgentSseController.events` auto-heal is over-permissive (security bug)
|
||||||
|
|
||||||
|
**File:** `cameleer-server-app/src/main/java/com/cameleer/server/app/controller/AgentSseController.java:63-76`
|
||||||
|
|
||||||
|
```java
|
||||||
|
AgentInfo agent = registryService.findById(id);
|
||||||
|
if (agent == null) {
|
||||||
|
var jwtResult = ...;
|
||||||
|
if (jwtResult != null) { // ← only checks JWT presence
|
||||||
|
registryService.register(id, id, application, env, ...);
|
||||||
|
} else {
|
||||||
|
throw 404;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Bug:** auto-heal registers *any* path id when any valid JWT is present, regardless of whether the JWT subject matches the path id. A holder of agent X's JWT can open SSE for any path-id Y, silently spoofing Y.
|
||||||
|
|
||||||
|
**Surface symptom:** `sseConnect_unknownAgent_returns404` sends a JWT for `test-agent-sse-it` and requests SSE for `unknown-sse-agent`. Auto-heal kicks in, returns 200 with an infinite empty stream. Test's `statusFuture.get(5s)` — which uses `BodyHandlers.ofString()` and waits for the full body — times out instead of getting a synchronous 404.
|
||||||
|
|
||||||
|
**Fix:** only auto-heal when `jwtResult.subject().equals(id)`.
|
||||||
|
|
||||||
|
### 2. `SseConnectionManager.pingAll` reads an unprefixed property key (production bug)
|
||||||
|
|
||||||
|
**File:** `cameleer-server-app/src/main/java/com/cameleer/server/app/agent/SseConnectionManager.java:172`
|
||||||
|
|
||||||
|
```java
|
||||||
|
@Scheduled(fixedDelayString = "${agent-registry.ping-interval-ms:15000}")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Bug:** `AgentRegistryConfig` is `@ConfigurationProperties(prefix = "cameleer.server.agentregistry")`. The scheduler reads an unprefixed `agent-registry.*` key that the YAML never defines — so the default 15s always applies, regardless of config. Same family of bug as the `MetricsFlushScheduler` fix in commit `a6944911`.
|
||||||
|
|
||||||
|
**Fix:** `${cameleer.server.agentregistry.ping-interval-ms:15000}`.
|
||||||
|
|
||||||
|
### 3. SSE response body doesn't flush until first event (test timing dependency)
|
||||||
|
|
||||||
|
**File:** `cameleer-server-app/src/main/java/com/cameleer/server/app/agent/SseConnectionManager.java:connect()`
|
||||||
|
|
||||||
|
Spring's `SseEmitter` holds the response open but doesn't flush headers to the client until the first `emitter.send()`. Until then, clients using `HttpResponse.BodyHandlers.ofInputStream()` block on the first byte.
|
||||||
|
|
||||||
|
**Surface symptom:**
|
||||||
|
- `lastEventIdHeader_connectionSucceeds` — asserts `awaitConnection(5000)` is `true`. The latch counts down in `.thenAccept(response -> ...)`, which in practice only fires once body bytes start flowing (JDK 21 behaviour with SSE streams). Default ping cadence is 15s → 5s assertion times out.
|
||||||
|
- `pingKeepalive_receivedViaSseStream` — waits 5s for a `:ping` line. The scheduler runs every 15s (both by default, and because of bug #2, unconditionally).
|
||||||
|
- `SseSigningIT.deepTraceEvent_containsValidSignature` — same family: `awaitConnection(5000).isTrue()`.
|
||||||
|
|
||||||
|
**Fix:** send an initial `: connected` comment as part of `connect()`. Spring flushes on the first `.send()`, so an immediate comment forces the response headers + first byte to hit the wire, which triggers the client's `thenAccept` callback. Also solves the ping-test: the initial comment is observed as a keepalive line within the test's polling window.
|
||||||
|
|
||||||
|
## Hypothesis ladder (ruled out)
|
||||||
|
|
||||||
|
- **Order-dependent singleton leak** — ruled out: every failure reproduces when the class is run solo.
|
||||||
|
- **Tomcat async thread pool exhaustion** — ruled out: `SseEmitter(Long.MAX_VALUE)` does hold threads, but the 7-test class doesn't reach Tomcat's defaults.
|
||||||
|
- **SseConnectionManager emitter-map contamination** — ruled out: each test uses a unique agent id (UUID-suffixed), and the `@Component` is the same instance across tests but the emitter map is keyed by agent id, no collisions.
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
```
|
||||||
|
mvn -pl cameleer-server-app -am -Dit.test='AgentSseControllerIT,SseSigningIT' ... verify
|
||||||
|
# Tests run: 9, Failures: 0, Errors: 0, Skipped: 0
|
||||||
|
```
|
||||||
|
|
||||||
|
All 9 tests green with the three fixes applied.
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
<!-- gitnexus:start -->
|
<!-- gitnexus:start -->
|
||||||
# GitNexus — Code Intelligence
|
# GitNexus — Code Intelligence
|
||||||
|
|
||||||
This project is indexed by GitNexus as **cameleer-server** (6306 symbols, 15892 relationships, 300 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
|
This project is indexed by GitNexus as **cameleer-server** (9731 symbols, 24987 relationships, 300 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
|
||||||
|
|
||||||
> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
|
> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
|
||||||
|
|
||||||
|
|||||||
43
CLAUDE.md
43
CLAUDE.md
@@ -10,20 +10,42 @@ Cameleer Server — observability server that receives, stores, and serves Camel
|
|||||||
|
|
||||||
- **cameleer** (`https://gitea.siegeln.net/cameleer/cameleer`) — the Java agent that instruments Camel applications
|
- **cameleer** (`https://gitea.siegeln.net/cameleer/cameleer`) — the Java agent that instruments Camel applications
|
||||||
- Protocol defined in `cameleer-common/PROTOCOL.md` in the agent repo
|
- Protocol defined in `cameleer-common/PROTOCOL.md` in the agent repo
|
||||||
- This server depends on `com.cameleer:cameleer-common` (shared models and graph API)
|
- This server depends on `io.cameleer:cameleer-common` (shared models and graph API).
|
||||||
|
|
||||||
|
## Registry naming (buildtime vs public)
|
||||||
|
|
||||||
|
The Gitea container / Maven / npm registry has **two DNS names that resolve to the same instance**:
|
||||||
|
|
||||||
|
- `gitea.siegeln.net` — internal hostname. CI pushes images here; `pom.xml`, `ui/.npmrc`, `ui/Dockerfile`, `ui/package-lock.json`, `deploy/`, and `.gitea/workflows/*.yml` all reference it. **Buildtime infrastructure.**
|
||||||
|
- `registry.cameleer.io` — public alias customers pull from. Compiled-in defaults (`application.yml`, `DeploymentExecutor.java` `@Value`), customer-facing docs (`HOWTO.md`, `ui/README.md`), and runtime image refs use this. **Customer-visible.**
|
||||||
|
|
||||||
|
The asymmetry is **intentional** during the institutionalization period — CI keeps publishing to the internal name while everything customer-shipped speaks the public name. Don't "fix" mismatches between, e.g., `pom.xml`'s registry URL and `application.yml`'s loader-image default; they speak to different audiences.
|
||||||
|
|
||||||
## Modules
|
## Modules
|
||||||
|
|
||||||
|
- `cameleer-license-api` — pure license contract types (`LicenseInfo`, `LicenseValidator`, `LicenseState`, `LicenseStateMachine`, `LicenseLimits`, `DefaultTierLimits`) under package `io.cameleer.license`. No Spring or server-runtime deps; consumed by `cameleer-server-core` (validation/runtime gate) and `cameleer-license-minter` (vendor signing) — and transitively by `cameleer-saas` via the minter — without inheriting server internals.
|
||||||
- `cameleer-server-core` — domain logic, storage interfaces, services (no Spring dependencies)
|
- `cameleer-server-core` — domain logic, storage interfaces, services (no Spring dependencies)
|
||||||
- `cameleer-server-app` — Spring Boot web app, REST controllers, SSE, persistence, Docker orchestration
|
- `cameleer-server-app` — Spring Boot web app, REST controllers, SSE, persistence, Docker orchestration
|
||||||
|
- `cameleer-license-minter` — vendor-only Ed25519 license signing library + CLI. Depends only on `cameleer-license-api` so consumers don't pull in `cameleer-server-core`.
|
||||||
|
|
||||||
## Build Commands
|
## Build Commands
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mvn clean compile # Compile all modules
|
mvn clean compile # Compile all modules
|
||||||
mvn clean verify # Full build with tests
|
mvn clean verify # Full build with tests
|
||||||
|
mvn clean verify -DskipITs # Fast: unit tests only (no Testcontainers)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Faster local builds
|
||||||
|
|
||||||
|
- **Surefire reuses forks** (`cameleer-server-app/pom.xml`): unit tests run with `forkCount=1C` + `reuseForks=true` — one JVM per CPU core, reused across classes. Test classes that mutate static state must clean up after themselves.
|
||||||
|
- **Testcontainers reuse** — opt-in per developer. Add to `~/.testcontainers.properties`:
|
||||||
|
```
|
||||||
|
testcontainers.reuse.enable=true
|
||||||
|
```
|
||||||
|
Then `AbstractPostgresIT` containers persist across `mvn verify` runs (saves ~20s per run). Stop them manually when you need a clean DB: `docker rm -f $(docker ps -aq --filter label=org.testcontainers.reuse=true)`.
|
||||||
|
- **UI build** dropped redundant `tsc --noEmit` from `npm run build` (Vite/esbuild type-checks during bundling). Run `npm run typecheck` explicitly when you want a full type-check pass.
|
||||||
|
|
||||||
## Run
|
## Run
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -34,7 +56,7 @@ java -jar cameleer-server-app/target/cameleer-server-app-1.0-SNAPSHOT.jar
|
|||||||
|
|
||||||
- Java 17+ required
|
- Java 17+ required
|
||||||
- Spring Boot 3.4.3 parent POM
|
- Spring Boot 3.4.3 parent POM
|
||||||
- Depends on `com.cameleer:cameleer-common` from Gitea Maven registry
|
- Depends on `io.cameleer:cameleer-common` from Gitea Maven registry
|
||||||
- Jackson `JavaTimeModule` for `Instant` deserialization
|
- Jackson `JavaTimeModule` for `Instant` deserialization
|
||||||
- Communication: receives HTTP POST data from agents (executions, diagrams, metrics, logs), serves SSE event streams for config push/commands (config-update, deep-trace, replay, route-control)
|
- Communication: receives HTTP POST data from agents (executions, diagrams, metrics, logs), serves SSE event streams for config push/commands (config-update, deep-trace, replay, route-control)
|
||||||
- URL taxonomy: user-facing data, config, and query endpoints live under `/api/v1/environments/{envSlug}/...`. Env is a path segment, resolved via the `@EnvPath` argument resolver (404 on unknown slug). Flat endpoints are only for: agent self-service (JWT-authoritative), cross-env admin (RBAC, OIDC, audit, license, thresholds, env CRUD), cross-env discovery (`/catalog`), content-addressed lookups (`/diagrams/{contentHash}/render`, `/executions/{id}`), and auth. See `.claude/rules/app-classes.md` for the full allow-list.
|
- URL taxonomy: user-facing data, config, and query endpoints live under `/api/v1/environments/{envSlug}/...`. Env is a path segment, resolved via the `@EnvPath` argument resolver (404 on unknown slug). Flat endpoints are only for: agent self-service (JWT-authoritative), cross-env admin (RBAC, OIDC, audit, license, thresholds, env CRUD), cross-env discovery (`/catalog`), content-addressed lookups (`/diagrams/{contentHash}/render`, `/executions/{id}`), and auth. See `.claude/rules/app-classes.md` for the full allow-list.
|
||||||
@@ -48,25 +70,18 @@ java -jar cameleer-server-app/target/cameleer-server-app-1.0-SNAPSHOT.jar
|
|||||||
- Log processor correlation: The agent sets `cameleer.processorId` in MDC, identifying which processor node emitted a log line.
|
- Log processor correlation: The agent sets `cameleer.processorId` in MDC, identifying which processor node emitted a log line.
|
||||||
- Logging: ClickHouse JDBC set to INFO (`com.clickhouse`), HTTP client to WARN (`org.apache.hc.client5`) in application.yml
|
- Logging: ClickHouse JDBC set to INFO (`com.clickhouse`), HTTP client to WARN (`org.apache.hc.client5`) in application.yml
|
||||||
- Security: JWT auth with RBAC (AGENT/VIEWER/OPERATOR/ADMIN roles), Ed25519 config signing (key derived deterministically from JWT secret via HMAC-SHA256), bootstrap token for registration. CORS: `CAMELEER_SERVER_SECURITY_CORSALLOWEDORIGINS` (comma-separated) overrides `CAMELEER_SERVER_SECURITY_UIORIGIN` for multi-origin setups. Infrastructure access: `CAMELEER_SERVER_SECURITY_INFRASTRUCTUREENDPOINTS=false` disables Database and ClickHouse admin endpoints. Last-ADMIN guard: system prevents removal of the last ADMIN role (409 Conflict). Password policy: min 12 chars, 3-of-4 character classes, no username match. Brute-force protection: 5 failed attempts -> 15 min lockout. Token revocation: `token_revoked_before` column on users, checked in `JwtAuthenticationFilter`, set on password change.
|
- Security: JWT auth with RBAC (AGENT/VIEWER/OPERATOR/ADMIN roles), Ed25519 config signing (key derived deterministically from JWT secret via HMAC-SHA256), bootstrap token for registration. CORS: `CAMELEER_SERVER_SECURITY_CORSALLOWEDORIGINS` (comma-separated) overrides `CAMELEER_SERVER_SECURITY_UIORIGIN` for multi-origin setups. Infrastructure access: `CAMELEER_SERVER_SECURITY_INFRASTRUCTUREENDPOINTS=false` disables Database and ClickHouse admin endpoints. Last-ADMIN guard: system prevents removal of the last ADMIN role (409 Conflict). Password policy: min 12 chars, 3-of-4 character classes, no username match. Brute-force protection: 5 failed attempts -> 15 min lockout. Token revocation: `token_revoked_before` column on users, checked in `JwtAuthenticationFilter`, set on password change.
|
||||||
|
- Login routing: `GET /api/v1/auth/capabilities` (unauthenticated) tells the SPA whether OIDC is the primary entry point. When OIDC is configured, the SSO button is the primary CTA and the local form is hidden behind `?local` (admin-recovery escape hatch). Per RFC 9700 §4.4 we do **not** use `prompt=none` for primary login — that returns `login_required` for first-time users and traps them on a local form.
|
||||||
- OIDC: Optional external identity provider support (token exchange pattern). Configured via admin API/UI, stored in database (`server_config` table). Resource server mode: accepts external access tokens (Logto M2M) via JWKS validation when `CAMELEER_SERVER_SECURITY_OIDCISSUERURI` is set. Scope-based role mapping via `SystemRole.normalizeScope()`. System roles synced on every OIDC login via `applyClaimMappings()` in `OidcAuthController` (calls `clearManagedAssignments` + `assignManagedRole` on `RbacService`) — always overwrites managed role assignments; uses managed assignment origin to avoid touching group-inherited or directly-assigned roles. Supports ES384, ES256, RS256.
|
- OIDC: Optional external identity provider support (token exchange pattern). Configured via admin API/UI, stored in database (`server_config` table). Resource server mode: accepts external access tokens (Logto M2M) via JWKS validation when `CAMELEER_SERVER_SECURITY_OIDCISSUERURI` is set. Scope-based role mapping via `SystemRole.normalizeScope()`. System roles synced on every OIDC login via `applyClaimMappings()` in `OidcAuthController` (calls `clearManagedAssignments` + `assignManagedRole` on `RbacService`) — always overwrites managed role assignments; uses managed assignment origin to avoid touching group-inherited or directly-assigned roles. Supports ES384, ES256, RS256.
|
||||||
- OIDC role extraction: `OidcTokenExchanger` reads roles from the **access_token** first (JWT with `at+jwt` type), then falls back to id_token. `OidcConfig` includes `audience` (RFC 8707 resource indicator) and `additionalScopes`. All provider-specific configuration is external — no provider-specific code in the server.
|
- OIDC role extraction: `OidcTokenExchanger` reads roles from the **access_token** first (JWT with `at+jwt` type), then falls back to id_token. `OidcConfig` includes `audience` (RFC 8707 resource indicator) and `additionalScopes`. All provider-specific configuration is external — no provider-specific code in the server.
|
||||||
|
- Container orchestration: tenant containers no longer bind-mount JARs from the host. `DockerRuntimeOrchestrator.startContainer` runs a 2-phase op per replica — a `cameleer-runtime-loader` init container fetches the JAR from a signed URL into a per-replica named volume, then the main container mounts that volume RO at `/app/jars`. The loader image (default `registry.cameleer.io/cameleer/cameleer-runtime-loader:latest`) is built and published by **cameleer-saas** at `docker/runtime-loader/` — this repo only consumes it. (Same image, internal push target is `gitea.siegeln.net/cameleer/cameleer-runtime-loader:latest`; see "Registry naming" above.) Env vars: `CAMELEER_SERVER_RUNTIME_LOADERIMAGE` (override loader image); `CAMELEER_SERVER_RUNTIME_ARTIFACTTOKENTTLSECONDS` (signed-URL TTL, default `600`); `CAMELEER_SERVER_RUNTIME_ARTIFACTBASEURL` (base URL the loader uses to reach the server; defaults to `cameleer.server.runtime.serverurl`, then `http://cameleer-server:8081`). See `.claude/rules/docker-orchestration.md` for the full loader pattern; `LoaderHardeningIT` is the cross-repo contract test.
|
||||||
- Sensitive keys: Global enforced baseline for masking sensitive data in agent payloads. Merge rule: `final = global UNION per-app` (case-insensitive dedup, per-app can only add, never remove global keys).
|
- Sensitive keys: Global enforced baseline for masking sensitive data in agent payloads. Merge rule: `final = global UNION per-app` (case-insensitive dedup, per-app can only add, never remove global keys).
|
||||||
- User persistence: PostgreSQL `users` table, admin CRUD at `/api/v1/admin/users`
|
- User persistence: PostgreSQL `users` table, admin CRUD at `/api/v1/admin/users`. `users.user_id` is the **bare** identifier — local users as `<username>`, OIDC users as `oidc:<sub>`. JWT `sub` carries the `user:` namespace prefix so `JwtAuthenticationFilter` can tell user tokens from agent tokens; write paths (`UiAuthController`, `OidcAuthController`, `UserAdminController`) all upsert unprefixed, and env-scoped read-path controllers strip the `user:` prefix before using the value as an FK to `users.user_id` / `user_roles.user_id`. Alerting / outbound FKs (`alert_rules.created_by`, `outbound_connections.created_by`, …) therefore all reference the bare form.
|
||||||
- Usage analytics: ClickHouse `usage_events` table tracks authenticated UI requests, flushed every 5s
|
- Usage analytics: ClickHouse `usage_events` table tracks authenticated UI requests, flushed every 5s
|
||||||
|
|
||||||
## Database Migrations
|
## Database Migrations
|
||||||
|
|
||||||
PostgreSQL (Flyway): `cameleer-server-app/src/main/resources/db/migration/`
|
PostgreSQL (Flyway): `cameleer-server-app/src/main/resources/db/migration/`
|
||||||
- V1 — RBAC (users, roles, groups, audit_log). `application_config` PK is `(application, environment)`; `app_settings` PK is `(application_id, environment)` — both tables are env-scoped.
|
- V1 — Consolidated baseline schema. All prior V1–V18 evolution was collapsed before first prod deploy. Contains: RBAC (users, roles, groups, user_roles, user_groups, group_roles, claim_mapping_rules), runtime management (environments, apps, app_versions, deployments), env-scoped application config (application_config PK `(application, environment)`, app_settings PK `(application_id, environment)`), audit_log, outbound_connections, server_config, and the full alerting subsystem (alert_rules, alert_rule_targets, alert_instances, alert_silences, alert_notifications). Seeds the 4 system roles (AGENT/VIEWER/OPERATOR/ADMIN), the `Admins` group with ADMIN role, and a default environment. Invariants covered by `SchemaBootstrapIT`.
|
||||||
- V2 — Claim mappings (OIDC)
|
|
||||||
- V3 — Runtime management (apps, environments, deployments, app_versions)
|
|
||||||
- V4 — Environment config (default_container_config JSONB)
|
|
||||||
- V5 — App container config (container_config JSONB on apps)
|
|
||||||
- V6 — JAR retention policy (jar_retention_count on environments)
|
|
||||||
- V7 — Deployment orchestration (target_state, deployment_strategy, replica_states JSONB, deploy_stage)
|
|
||||||
- V8 — Deployment active config (resolved_config JSONB on deployments)
|
|
||||||
- V9 — Password hardening (failed_login_attempts, locked_until, token_revoked_before on users)
|
|
||||||
- V10 — Runtime type detection (detected_runtime_type, detected_main_class on app_versions)
|
|
||||||
|
|
||||||
ClickHouse: `cameleer-server-app/src/main/resources/clickhouse/init.sql` (run idempotently on startup)
|
ClickHouse: `cameleer-server-app/src/main/resources/clickhouse/init.sql` (run idempotently on startup)
|
||||||
|
|
||||||
@@ -94,7 +109,7 @@ When adding, removing, or renaming classes, controllers, endpoints, UI component
|
|||||||
<!-- gitnexus:start -->
|
<!-- gitnexus:start -->
|
||||||
# GitNexus — Code Intelligence
|
# GitNexus — Code Intelligence
|
||||||
|
|
||||||
This project is indexed by GitNexus as **cameleer-server** (6436 symbols, 16257 relationships, 300 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
|
This project is indexed by GitNexus as **cameleer-server** (10697 symbols, 27649 relationships, 300 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
|
||||||
|
|
||||||
> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
|
> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
|
||||||
|
|
||||||
|
|||||||
12
Dockerfile
12
Dockerfile
@@ -1,10 +1,14 @@
|
|||||||
FROM --platform=$BUILDPLATFORM maven:3.9-eclipse-temurin-17 AS build
|
FROM --platform=$BUILDPLATFORM maven:3.9-eclipse-temurin-17 AS build
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
# Configure Gitea Maven Registry for cameleer-common dependency
|
# Optional auth for Gitea Maven Registry. The `cameleer/cameleer-common` package
|
||||||
ARG REGISTRY_TOKEN
|
# is published publicly, so empty token → anonymous pull (no settings.xml).
|
||||||
RUN mkdir -p ~/.m2 && \
|
# Private packages require a non-empty token.
|
||||||
echo '<settings><servers><server><id>gitea</id><username>cameleer</username><password>'${REGISTRY_TOKEN}'</password></server></servers></settings>' > ~/.m2/settings.xml
|
ARG REGISTRY_TOKEN=""
|
||||||
|
RUN if [ -n "$REGISTRY_TOKEN" ]; then \
|
||||||
|
mkdir -p ~/.m2 && \
|
||||||
|
printf '<settings><servers><server><id>gitea</id><username>cameleer</username><password>%s</password></server></servers></settings>\n' "$REGISTRY_TOKEN" > ~/.m2/settings.xml; \
|
||||||
|
fi
|
||||||
|
|
||||||
COPY pom.xml .
|
COPY pom.xml .
|
||||||
COPY cameleer-server-core/pom.xml cameleer-server-core/
|
COPY cameleer-server-core/pom.xml cameleer-server-core/
|
||||||
|
|||||||
111
HOWTO.md
111
HOWTO.md
@@ -19,38 +19,99 @@ mvn clean compile # compile only
|
|||||||
mvn clean verify # compile + run all tests (needs Docker for integration tests)
|
mvn clean verify # compile + run all tests (needs Docker for integration tests)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Infrastructure Setup
|
## Start a brand-new local environment (Docker)
|
||||||
|
|
||||||
Start PostgreSQL:
|
The repo ships a `docker-compose.yml` with the full stack: PostgreSQL, ClickHouse, the Spring Boot server, and the nginx-served SPA. All dev defaults are baked into the compose file — no `.env` file or extra config needed for a first run.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# 1. Clean slate (safe if this is already a first run — noop when no volumes exist)
|
||||||
|
docker compose down -v
|
||||||
|
|
||||||
|
# 2. Build + start everything. First run rebuilds both images (~2–4 min).
|
||||||
|
docker compose up -d --build
|
||||||
|
|
||||||
|
# 3. Watch the server come up (health check goes green in ~60–90s after Flyway + ClickHouse init)
|
||||||
|
docker compose logs -f cameleer-server
|
||||||
|
# ready when you see "Started CameleerServerApplication in ...".
|
||||||
|
# Ctrl+C when ready — containers keep running.
|
||||||
|
|
||||||
|
# 4. Smoke test
|
||||||
|
curl -s http://localhost:8081/api/v1/health # → {"status":"UP"}
|
||||||
|
```
|
||||||
|
|
||||||
|
Open the UI at **http://localhost:8080** (nginx) and log in with **admin / admin**.
|
||||||
|
|
||||||
|
| Service | Host port | URL / notes |
|
||||||
|
|------------|-----------|-------------|
|
||||||
|
| Web UI (nginx) | 8080 | http://localhost:8080 — proxies `/api` to the server |
|
||||||
|
| Server API | 8081 | http://localhost:8081/api/v1/health, http://localhost:8081/api/v1/swagger-ui.html |
|
||||||
|
| PostgreSQL | 5432 | user `cameleer`, password `cameleer_dev`, db `cameleer` |
|
||||||
|
| ClickHouse | 8123 (HTTP), 9000 (native) | user `default`, no password, db `cameleer` |
|
||||||
|
|
||||||
|
**Dev credentials baked into compose (do not use in production):**
|
||||||
|
|
||||||
|
| Purpose | Value |
|
||||||
|
|---|---|
|
||||||
|
| UI login | `admin` / `admin` |
|
||||||
|
| Bootstrap token (agent registration) | `dev-bootstrap-token-for-local-agent-registration` |
|
||||||
|
| JWT secret | `dev-jwt-secret-32-bytes-min-0123456789abcdef0123456789abcdef` |
|
||||||
|
| `CAMELEER_SERVER_RUNTIME_ENABLED` | `false` (Docker-in-Docker app orchestration off for the local stack) |
|
||||||
|
|
||||||
|
Override any of these by editing `docker-compose.yml` or passing `-e KEY=value` to `docker compose run`.
|
||||||
|
|
||||||
|
### Common lifecycle commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop everything but keep volumes (quick restart later)
|
||||||
|
docker compose stop
|
||||||
|
|
||||||
|
# Start again after a stop
|
||||||
|
docker compose start
|
||||||
|
|
||||||
|
# Apply changes to the server code / UI — rebuild just what changed
|
||||||
|
docker compose up -d --build cameleer-server
|
||||||
|
docker compose up -d --build cameleer-ui
|
||||||
|
|
||||||
|
# Wipe the environment completely (drops PG + ClickHouse volumes — all data gone)
|
||||||
|
docker compose down -v
|
||||||
|
|
||||||
|
# Fresh Flyway run by dropping just the PG volume (keeps ClickHouse data)
|
||||||
|
docker compose down
|
||||||
|
docker volume rm cameleer-server_cameleer-pgdata
|
||||||
docker compose up -d
|
docker compose up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
This starts PostgreSQL 16. The database schema is applied automatically via Flyway migrations on server startup. ClickHouse tables are created by the schema initializer on startup.
|
### Infra-only mode (backend via `mvn` / UI via Vite)
|
||||||
|
|
||||||
| Service | Port | Purpose |
|
If you want to iterate on backend/UI code without rebuilding the server image on every change, start just the databases and run the server + UI locally:
|
||||||
|------------|------|----------------------|
|
|
||||||
| PostgreSQL | 5432 | JDBC (Spring JDBC) |
|
|
||||||
|
|
||||||
PostgreSQL credentials: `cameleer` / `cameleer_dev`, database `cameleer`.
|
|
||||||
|
|
||||||
## Run the Server
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# 1. Only infra containers
|
||||||
|
docker compose up -d cameleer-postgres cameleer-clickhouse
|
||||||
|
|
||||||
|
# 2. Build and run the server jar against those containers
|
||||||
mvn clean package -DskipTests
|
mvn clean package -DskipTests
|
||||||
SPRING_DATASOURCE_URL=jdbc:postgresql://localhost:5432/cameleer \
|
SPRING_DATASOURCE_URL="jdbc:postgresql://localhost:5432/cameleer?currentSchema=tenant_default&ApplicationName=tenant_default" \
|
||||||
SPRING_DATASOURCE_USERNAME=cameleer \
|
SPRING_DATASOURCE_USERNAME=cameleer \
|
||||||
SPRING_DATASOURCE_PASSWORD=cameleer_dev \
|
SPRING_DATASOURCE_PASSWORD=cameleer_dev \
|
||||||
CAMELEER_SERVER_SECURITY_BOOTSTRAPTOKEN=my-secret-token \
|
SPRING_FLYWAY_USER=cameleer \
|
||||||
|
SPRING_FLYWAY_PASSWORD=cameleer_dev \
|
||||||
|
CAMELEER_SERVER_CLICKHOUSE_URL="jdbc:clickhouse://localhost:8123/cameleer" \
|
||||||
|
CAMELEER_SERVER_CLICKHOUSE_USERNAME=default \
|
||||||
|
CAMELEER_SERVER_CLICKHOUSE_PASSWORD= \
|
||||||
|
CAMELEER_SERVER_SECURITY_BOOTSTRAPTOKEN=dev-bootstrap-token-for-local-agent-registration \
|
||||||
|
CAMELEER_SERVER_SECURITY_JWTSECRET=dev-jwt-secret-32-bytes-min-0123456789abcdef0123456789abcdef \
|
||||||
|
CAMELEER_SERVER_RUNTIME_ENABLED=false \
|
||||||
|
CAMELEER_SERVER_TENANT_ID=default \
|
||||||
java -jar cameleer-server-app/target/cameleer-server-app-1.0-SNAPSHOT.jar
|
java -jar cameleer-server-app/target/cameleer-server-app-1.0-SNAPSHOT.jar
|
||||||
|
|
||||||
|
# 3. In another terminal — Vite dev server on :5173 (proxies /api → :8081)
|
||||||
|
cd ui && npm install && npm run dev
|
||||||
```
|
```
|
||||||
|
|
||||||
> **Note:** The Docker image no longer includes default database credentials. When running via `docker run`, pass `-e SPRING_DATASOURCE_URL=...` etc. The docker-compose setup provides these automatically.
|
Database schema is applied automatically: PostgreSQL via Flyway migrations on server startup, ClickHouse tables via `ClickHouseSchemaInitializer`. No manual DDL needed.
|
||||||
|
|
||||||
The server starts on **port 8081**. The `CAMELEER_SERVER_SECURITY_BOOTSTRAPTOKEN` environment variable is **required** — the server fails fast on startup if it is not set.
|
`CAMELEER_SERVER_SECURITY_BOOTSTRAPTOKEN` is **required** for agent registration — the server fails fast on startup if it's not set. For token rotation without downtime, set `CAMELEER_SERVER_SECURITY_BOOTSTRAPTOKENPREVIOUS` to the old token while rolling out the new one — the server accepts both during the overlap window.
|
||||||
|
|
||||||
For token rotation without downtime, set `CAMELEER_SERVER_SECURITY_BOOTSTRAPTOKENPREVIOUS` to the old token while rolling out the new one. The server accepts both during the overlap window.
|
|
||||||
|
|
||||||
## API Endpoints
|
## API Endpoints
|
||||||
|
|
||||||
@@ -177,7 +238,7 @@ Logto is proxy-aware via `TRUST_PROXY_HEADER=1`. The `LOGTO_ENDPOINT` and `LOGTO
|
|||||||
- Note the **Client ID**
|
- Note the **Client ID**
|
||||||
3. **Create API Resource**: API Resources → Create
|
3. **Create API Resource**: API Resources → Create
|
||||||
- Name: `Cameleer Server API`
|
- Name: `Cameleer Server API`
|
||||||
- Indicator: your API URL (e.g., `https://cameleer.siegeln.net/api`)
|
- Indicator: your API URL (e.g., `https://cameleer.example.com/api`)
|
||||||
- Add permissions: `server:admin`, `server:operator`, `server:viewer`
|
- Add permissions: `server:admin`, `server:operator`, `server:viewer`
|
||||||
4. **Create M2M application** (for SaaS platform): Applications → Create → Machine-to-Machine
|
4. **Create M2M application** (for SaaS platform): Applications → Create → Machine-to-Machine
|
||||||
- Name: `Cameleer SaaS`
|
- Name: `Cameleer SaaS`
|
||||||
@@ -433,11 +494,15 @@ Key settings in `cameleer-server-app/src/main/resources/application.yml`. All cu
|
|||||||
| `cameleer.server.runtime.enabled` | `true` | `CAMELEER_SERVER_RUNTIME_ENABLED` | Enable Docker orchestration |
|
| `cameleer.server.runtime.enabled` | `true` | `CAMELEER_SERVER_RUNTIME_ENABLED` | Enable Docker orchestration |
|
||||||
| `cameleer.server.runtime.baseimage` | `cameleer-runtime-base:latest` | `CAMELEER_SERVER_RUNTIME_BASEIMAGE` | Base Docker image for app containers |
|
| `cameleer.server.runtime.baseimage` | `cameleer-runtime-base:latest` | `CAMELEER_SERVER_RUNTIME_BASEIMAGE` | Base Docker image for app containers |
|
||||||
| `cameleer.server.runtime.dockernetwork` | `cameleer` | `CAMELEER_SERVER_RUNTIME_DOCKERNETWORK` | Primary Docker network |
|
| `cameleer.server.runtime.dockernetwork` | `cameleer` | `CAMELEER_SERVER_RUNTIME_DOCKERNETWORK` | Primary Docker network |
|
||||||
| `cameleer.server.runtime.jarstoragepath` | `/data/jars` | `CAMELEER_SERVER_RUNTIME_JARSTORAGEPATH` | JAR file storage directory |
|
| `cameleer.server.runtime.dockerruntime` | *(empty = auto)* | `CAMELEER_SERVER_RUNTIME_DOCKERRUNTIME` | Container runtime override. Empty auto-detects gVisor (`runsc`) when registered with the daemon and falls back to the daemon default. Set to e.g. `kata` to force a specific runtime, or `runc` to force the default even if `runsc` is installed. |
|
||||||
| `cameleer.server.runtime.jardockervolume` | *(empty)* | `CAMELEER_SERVER_RUNTIME_JARDOCKERVOLUME` | Docker volume for JAR sharing |
|
| `cameleer.server.runtime.jarstoragepath` | `/data/jars` | `CAMELEER_SERVER_RUNTIME_JARSTORAGEPATH` | JAR file storage directory (used by `FilesystemArtifactStore`) |
|
||||||
|
| `cameleer.server.runtime.loaderimage` | `registry.cameleer.io/cameleer/cameleer-runtime-loader:latest` | `CAMELEER_SERVER_RUNTIME_LOADERIMAGE` | Init-container image that fetches the JAR via signed URL |
|
||||||
|
| `cameleer.server.runtime.artifacttokenttlseconds` | `600` | `CAMELEER_SERVER_RUNTIME_ARTIFACTTOKENTTLSECONDS` | TTL (seconds) for HMAC-signed artifact-download URLs |
|
||||||
|
| `cameleer.server.runtime.artifactbaseurl` | *(empty)* | `CAMELEER_SERVER_RUNTIME_ARTIFACTBASEURL` | Base URL the loader uses to reach the server. Blank falls back to `serverurl`, then `http://cameleer-server:8081`. Must be reachable from the loader container's primary Docker network. |
|
||||||
| `cameleer.server.runtime.routingmode` | `path` | `CAMELEER_SERVER_RUNTIME_ROUTINGMODE` | `path` or `subdomain` Traefik routing |
|
| `cameleer.server.runtime.routingmode` | `path` | `CAMELEER_SERVER_RUNTIME_ROUTINGMODE` | `path` or `subdomain` Traefik routing |
|
||||||
| `cameleer.server.runtime.routingdomain` | `localhost` | `CAMELEER_SERVER_RUNTIME_ROUTINGDOMAIN` | Domain for Traefik routing labels |
|
| `cameleer.server.runtime.routingdomain` | `localhost` | `CAMELEER_SERVER_RUNTIME_ROUTINGDOMAIN` | Domain for Traefik routing labels |
|
||||||
| `cameleer.server.runtime.serverurl` | *(empty)* | `CAMELEER_SERVER_RUNTIME_SERVERURL` | Server URL injected into app containers |
|
| `cameleer.server.runtime.serverurl` | *(empty)* | `CAMELEER_SERVER_RUNTIME_SERVERURL` | Server URL injected into app containers |
|
||||||
|
| `cameleer.server.runtime.certresolver` | *(empty)* | `CAMELEER_SERVER_RUNTIME_CERTRESOLVER` | Traefik TLS cert resolver name (e.g. `letsencrypt`). Blank = omit the `tls.certresolver` label and let Traefik serve the default TLS-store cert |
|
||||||
| `cameleer.server.runtime.agenthealthport` | `9464` | `CAMELEER_SERVER_RUNTIME_AGENTHEALTHPORT` | Agent health check port |
|
| `cameleer.server.runtime.agenthealthport` | `9464` | `CAMELEER_SERVER_RUNTIME_AGENTHEALTHPORT` | Agent health check port |
|
||||||
| `cameleer.server.runtime.healthchecktimeout` | `60` | `CAMELEER_SERVER_RUNTIME_HEALTHCHECKTIMEOUT` | Health check timeout (seconds) |
|
| `cameleer.server.runtime.healthchecktimeout` | `60` | `CAMELEER_SERVER_RUNTIME_HEALTHCHECKTIMEOUT` | Health check timeout (seconds) |
|
||||||
| `cameleer.server.runtime.container.memorylimit` | `512m` | `CAMELEER_SERVER_RUNTIME_CONTAINER_MEMORYLIMIT` | Default memory limit for app containers |
|
| `cameleer.server.runtime.container.memorylimit` | `512m` | `CAMELEER_SERVER_RUNTIME_CONTAINER_MEMORYLIMIT` | Default memory limit for app containers |
|
||||||
@@ -526,10 +591,10 @@ cameleer-demo namespace:
|
|||||||
|
|
||||||
| Service | URL |
|
| Service | URL |
|
||||||
|---------|-----|
|
|---------|-----|
|
||||||
| Web UI | `http://192.168.50.86:30090` |
|
| Web UI | `http://<your-cluster-host>:30090` |
|
||||||
| Server API | `http://192.168.50.86:30081/api/v1/health` |
|
| Server API | `http://<your-cluster-host>:30081/api/v1/health` |
|
||||||
| Swagger UI | `http://192.168.50.86:30081/api/v1/swagger-ui.html` |
|
| Swagger UI | `http://<your-cluster-host>:30081/api/v1/swagger-ui.html` |
|
||||||
| Deploy Demo | `http://192.168.50.86:30092` |
|
| Deploy Demo | `http://<your-cluster-host>:30092` |
|
||||||
| Logto API | `LOGTO_ENDPOINT` secret (NodePort 30951 direct, or behind reverse proxy) |
|
| Logto API | `LOGTO_ENDPOINT` secret (NodePort 30951 direct, or behind reverse proxy) |
|
||||||
| Logto Admin | `LOGTO_ADMIN_ENDPOINT` secret (NodePort 30952 direct, or behind reverse proxy) |
|
| Logto Admin | `LOGTO_ADMIN_ENDPOINT` secret (NodePort 30952 direct, or behind reverse proxy) |
|
||||||
|
|
||||||
|
|||||||
54
cameleer-license-api/pom.xml
Normal file
54
cameleer-license-api/pom.xml
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>io.cameleer</groupId>
|
||||||
|
<artifactId>cameleer-server-parent</artifactId>
|
||||||
|
<version>1.0-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>cameleer-license-api</artifactId>
|
||||||
|
<name>Cameleer License API</name>
|
||||||
|
<description>Pure license contract types — LicenseInfo, LicenseValidator, LicenseState, LicenseStateMachine, LicenseLimits, DefaultTierLimits. Shared by server-core (validation/runtime gate) and cameleer-license-minter (vendor-side signing). Has no Spring or server-runtime dependencies so consumers like cameleer-saas can depend on the minter without inheriting server internals.</description>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-databind</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.junit.jupiter</groupId>
|
||||||
|
<artifactId>junit-jupiter</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.assertj</groupId>
|
||||||
|
<artifactId>assertj-core</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.springframework.boot</groupId>
|
||||||
|
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<!-- Plain library JAR — no repackage. -->
|
||||||
|
<execution>
|
||||||
|
<id>repackage</id>
|
||||||
|
<phase>none</phase>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</project>
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
package io.cameleer.license;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public final class DefaultTierLimits {
|
||||||
|
|
||||||
|
public static final Map<String, Integer> DEFAULTS;
|
||||||
|
|
||||||
|
static {
|
||||||
|
Map<String, Integer> m = new LinkedHashMap<>();
|
||||||
|
m.put("max_environments", 1);
|
||||||
|
m.put("max_apps", 3);
|
||||||
|
m.put("max_agents", 5);
|
||||||
|
m.put("max_users", 3);
|
||||||
|
m.put("max_outbound_connections", 1);
|
||||||
|
m.put("max_alert_rules", 2);
|
||||||
|
m.put("max_total_cpu_millis", 2000);
|
||||||
|
m.put("max_total_memory_mb", 2048);
|
||||||
|
m.put("max_total_replicas", 5);
|
||||||
|
m.put("max_execution_retention_days", 1);
|
||||||
|
m.put("max_log_retention_days", 1);
|
||||||
|
m.put("max_metric_retention_days", 1);
|
||||||
|
m.put("max_jar_retention_count", 3);
|
||||||
|
DEFAULTS = Collections.unmodifiableMap(m);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DefaultTierLimits() {}
|
||||||
|
}
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
package io.cameleer.license;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
/** A parsed and signature-verified license. Construct via {@link LicenseValidator}. */
|
||||||
|
public record LicenseInfo(
|
||||||
|
UUID licenseId,
|
||||||
|
String tenantId,
|
||||||
|
String label,
|
||||||
|
Map<String, Integer> limits,
|
||||||
|
Instant issuedAt,
|
||||||
|
Instant expiresAt,
|
||||||
|
int gracePeriodDays
|
||||||
|
) {
|
||||||
|
public LicenseInfo {
|
||||||
|
Objects.requireNonNull(licenseId, "licenseId is required");
|
||||||
|
Objects.requireNonNull(tenantId, "tenantId is required");
|
||||||
|
Objects.requireNonNull(limits, "limits is required");
|
||||||
|
Objects.requireNonNull(issuedAt, "issuedAt is required");
|
||||||
|
Objects.requireNonNull(expiresAt, "expiresAt is required");
|
||||||
|
if (tenantId.isBlank()) {
|
||||||
|
throw new IllegalArgumentException("tenantId must not be blank");
|
||||||
|
}
|
||||||
|
if (gracePeriodDays < 0) {
|
||||||
|
throw new IllegalArgumentException("gracePeriodDays must be >= 0");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** True iff now > expiresAt + gracePeriodDays. */
|
||||||
|
public boolean isExpired() {
|
||||||
|
Instant deadline = expiresAt.plusSeconds((long) gracePeriodDays * 86400);
|
||||||
|
return Instant.now().isAfter(deadline);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** True iff now > expiresAt (regardless of grace). Used by the state machine to distinguish ACTIVE from GRACE. */
|
||||||
|
public boolean isAfterRawExpiry() {
|
||||||
|
return Instant.now().isAfter(expiresAt);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getLimit(String key, int defaultValue) {
|
||||||
|
return limits.getOrDefault(key, defaultValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
package io.cameleer.license;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public record LicenseLimits(Map<String, Integer> values) {
|
||||||
|
|
||||||
|
public LicenseLimits {
|
||||||
|
Objects.requireNonNull(values, "values");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static LicenseLimits defaultsOnly() {
|
||||||
|
return new LicenseLimits(DefaultTierLimits.DEFAULTS);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static LicenseLimits mergeOverDefaults(Map<String, Integer> overrides) {
|
||||||
|
Map<String, Integer> merged = new LinkedHashMap<>(DefaultTierLimits.DEFAULTS);
|
||||||
|
if (overrides != null) merged.putAll(overrides);
|
||||||
|
return new LicenseLimits(Collections.unmodifiableMap(merged));
|
||||||
|
}
|
||||||
|
|
||||||
|
public int get(String key) {
|
||||||
|
Integer v = values.get(key);
|
||||||
|
if (v == null) {
|
||||||
|
throw new IllegalArgumentException("Unknown license limit key: " + key);
|
||||||
|
}
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isDefaultSourced(String key, LicenseInfo license) {
|
||||||
|
if (license == null) return true;
|
||||||
|
return !license.limits().containsKey(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
package io.cameleer.license;
|
||||||
|
|
||||||
|
public enum LicenseState {
|
||||||
|
ABSENT,
|
||||||
|
ACTIVE,
|
||||||
|
GRACE,
|
||||||
|
EXPIRED,
|
||||||
|
INVALID
|
||||||
|
}
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
package io.cameleer.license;
|
||||||
|
|
||||||
|
public final class LicenseStateMachine {
|
||||||
|
|
||||||
|
private LicenseStateMachine() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param license parsed license, or null if no license is loaded
|
||||||
|
* @param invalidReason non-null if the last validation attempt failed
|
||||||
|
*/
|
||||||
|
public static LicenseState classify(LicenseInfo license, String invalidReason) {
|
||||||
|
if (invalidReason != null) {
|
||||||
|
return LicenseState.INVALID;
|
||||||
|
}
|
||||||
|
if (license == null) {
|
||||||
|
return LicenseState.ABSENT;
|
||||||
|
}
|
||||||
|
if (!license.isAfterRawExpiry()) {
|
||||||
|
return LicenseState.ACTIVE;
|
||||||
|
}
|
||||||
|
if (!license.isExpired()) {
|
||||||
|
return LicenseState.GRACE;
|
||||||
|
}
|
||||||
|
return LicenseState.EXPIRED;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,14 +1,20 @@
|
|||||||
package com.cameleer.server.core.license;
|
package io.cameleer.license;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.JsonNode;
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.security.*;
|
import java.security.KeyFactory;
|
||||||
|
import java.security.PublicKey;
|
||||||
|
import java.security.Signature;
|
||||||
import java.security.spec.X509EncodedKeySpec;
|
import java.security.spec.X509EncodedKeySpec;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.util.*;
|
import java.util.Base64;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
public class LicenseValidator {
|
public class LicenseValidator {
|
||||||
|
|
||||||
@@ -16,8 +22,13 @@ public class LicenseValidator {
|
|||||||
private static final ObjectMapper objectMapper = new ObjectMapper();
|
private static final ObjectMapper objectMapper = new ObjectMapper();
|
||||||
|
|
||||||
private final PublicKey publicKey;
|
private final PublicKey publicKey;
|
||||||
|
private final String expectedTenantId;
|
||||||
|
|
||||||
public LicenseValidator(String publicKeyBase64) {
|
public LicenseValidator(String publicKeyBase64, String expectedTenantId) {
|
||||||
|
Objects.requireNonNull(expectedTenantId, "expectedTenantId is required");
|
||||||
|
if (expectedTenantId.isBlank()) {
|
||||||
|
throw new IllegalArgumentException("expectedTenantId must not be blank");
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
byte[] keyBytes = Base64.getDecoder().decode(publicKeyBase64);
|
byte[] keyBytes = Base64.getDecoder().decode(publicKeyBase64);
|
||||||
KeyFactory kf = KeyFactory.getInstance("Ed25519");
|
KeyFactory kf = KeyFactory.getInstance("Ed25519");
|
||||||
@@ -25,6 +36,7 @@ public class LicenseValidator {
|
|||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new IllegalStateException("Failed to load license public key", e);
|
throw new IllegalStateException("Failed to load license public key", e);
|
||||||
}
|
}
|
||||||
|
this.expectedTenantId = expectedTenantId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public LicenseInfo validate(String token) {
|
public LicenseInfo validate(String token) {
|
||||||
@@ -36,7 +48,6 @@ public class LicenseValidator {
|
|||||||
byte[] payloadBytes = Base64.getDecoder().decode(parts[0]);
|
byte[] payloadBytes = Base64.getDecoder().decode(parts[0]);
|
||||||
byte[] signatureBytes = Base64.getDecoder().decode(parts[1]);
|
byte[] signatureBytes = Base64.getDecoder().decode(parts[1]);
|
||||||
|
|
||||||
// Verify signature
|
|
||||||
try {
|
try {
|
||||||
Signature verifier = Signature.getInstance("Ed25519");
|
Signature verifier = Signature.getInstance("Ed25519");
|
||||||
verifier.initVerify(publicKey);
|
verifier.initVerify(publicKey);
|
||||||
@@ -50,23 +61,25 @@ public class LicenseValidator {
|
|||||||
throw new SecurityException("License signature verification failed", e);
|
throw new SecurityException("License signature verification failed", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse payload
|
|
||||||
try {
|
try {
|
||||||
JsonNode root = objectMapper.readTree(payloadBytes);
|
JsonNode root = objectMapper.readTree(payloadBytes);
|
||||||
|
|
||||||
String tier = root.get("tier").asText();
|
String licenseIdStr = textOrThrow(root, "licenseId");
|
||||||
|
UUID licenseId;
|
||||||
Set<Feature> features = new HashSet<>();
|
|
||||||
if (root.has("features")) {
|
|
||||||
for (JsonNode f : root.get("features")) {
|
|
||||||
try {
|
try {
|
||||||
features.add(Feature.valueOf(f.asText()));
|
licenseId = UUID.fromString(licenseIdStr);
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
log.warn("Unknown feature in license: {}", f.asText());
|
throw new IllegalArgumentException("licenseId is not a valid UUID: " + licenseIdStr);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String tenantId = textOrThrow(root, "tenantId");
|
||||||
|
if (!tenantId.equals(expectedTenantId)) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"License tenantId '" + tenantId + "' does not match server tenant '" + expectedTenantId + "'");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String label = root.has("label") ? root.get("label").asText() : null;
|
||||||
|
|
||||||
Map<String, Integer> limits = new HashMap<>();
|
Map<String, Integer> limits = new HashMap<>();
|
||||||
if (root.has("limits")) {
|
if (root.has("limits")) {
|
||||||
root.get("limits").fields().forEachRemaining(entry ->
|
root.get("limits").fields().forEachRemaining(entry ->
|
||||||
@@ -74,12 +87,17 @@ public class LicenseValidator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Instant issuedAt = root.has("iat") ? Instant.ofEpochSecond(root.get("iat").asLong()) : Instant.now();
|
Instant issuedAt = root.has("iat") ? Instant.ofEpochSecond(root.get("iat").asLong()) : Instant.now();
|
||||||
Instant expiresAt = root.has("exp") ? Instant.ofEpochSecond(root.get("exp").asLong()) : null;
|
if (!root.has("exp")) {
|
||||||
|
throw new IllegalArgumentException("exp is required");
|
||||||
|
}
|
||||||
|
Instant expiresAt = Instant.ofEpochSecond(root.get("exp").asLong());
|
||||||
|
int gracePeriodDays = root.has("gracePeriodDays") ? root.get("gracePeriodDays").asInt() : 0;
|
||||||
|
|
||||||
LicenseInfo info = new LicenseInfo(tier, features, limits, issuedAt, expiresAt);
|
LicenseInfo info = new LicenseInfo(licenseId, tenantId, label, limits, issuedAt, expiresAt, gracePeriodDays);
|
||||||
|
|
||||||
if (info.isExpired()) {
|
if (info.isExpired()) {
|
||||||
throw new IllegalArgumentException("License expired at " + expiresAt);
|
throw new IllegalArgumentException("License expired at " + expiresAt
|
||||||
|
+ " (grace period " + gracePeriodDays + " days)");
|
||||||
}
|
}
|
||||||
|
|
||||||
return info;
|
return info;
|
||||||
@@ -89,4 +107,11 @@ public class LicenseValidator {
|
|||||||
throw new IllegalArgumentException("Failed to parse license payload", e);
|
throw new IllegalArgumentException("Failed to parse license payload", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String textOrThrow(JsonNode root, String field) {
|
||||||
|
if (!root.has(field) || root.get(field).asText().isBlank()) {
|
||||||
|
throw new IllegalArgumentException(field + " is required");
|
||||||
|
}
|
||||||
|
return root.get(field).asText();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
package io.cameleer.license;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
class DefaultTierLimitsTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void allDocumentedKeysHaveDefaults() {
|
||||||
|
for (String key : new String[]{
|
||||||
|
"max_environments", "max_apps", "max_agents", "max_users",
|
||||||
|
"max_outbound_connections", "max_alert_rules",
|
||||||
|
"max_total_cpu_millis", "max_total_memory_mb", "max_total_replicas",
|
||||||
|
"max_execution_retention_days", "max_log_retention_days",
|
||||||
|
"max_metric_retention_days", "max_jar_retention_count"
|
||||||
|
}) {
|
||||||
|
assertThat(DefaultTierLimits.DEFAULTS).containsKey(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void specificValues() {
|
||||||
|
assertThat(DefaultTierLimits.DEFAULTS.get("max_environments")).isEqualTo(1);
|
||||||
|
assertThat(DefaultTierLimits.DEFAULTS.get("max_apps")).isEqualTo(3);
|
||||||
|
assertThat(DefaultTierLimits.DEFAULTS.get("max_agents")).isEqualTo(5);
|
||||||
|
assertThat(DefaultTierLimits.DEFAULTS.get("max_total_cpu_millis")).isEqualTo(2000);
|
||||||
|
assertThat(DefaultTierLimits.DEFAULTS.get("max_log_retention_days")).isEqualTo(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
package io.cameleer.license;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||||
|
|
||||||
|
class LicenseInfoTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void requiresLicenseId() {
|
||||||
|
assertThatThrownBy(() -> new LicenseInfo(
|
||||||
|
null, "acme", "label",
|
||||||
|
Map.of(), Instant.now(), Instant.now().plusSeconds(60), 0))
|
||||||
|
.isInstanceOf(NullPointerException.class)
|
||||||
|
.hasMessageContaining("licenseId");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void requiresTenantId() {
|
||||||
|
assertThatThrownBy(() -> new LicenseInfo(
|
||||||
|
UUID.randomUUID(), null, "label",
|
||||||
|
Map.of(), Instant.now(), Instant.now().plusSeconds(60), 0))
|
||||||
|
.isInstanceOf(NullPointerException.class)
|
||||||
|
.hasMessageContaining("tenantId");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void emptyTenantIdRejected() {
|
||||||
|
assertThatThrownBy(() -> new LicenseInfo(
|
||||||
|
UUID.randomUUID(), " ", "label",
|
||||||
|
Map.of(), Instant.now(), Instant.now().plusSeconds(60), 0))
|
||||||
|
.isInstanceOf(IllegalArgumentException.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getLimit_returnsDefaultWhenMissing() {
|
||||||
|
LicenseInfo info = new LicenseInfo(
|
||||||
|
UUID.randomUUID(), "acme", null,
|
||||||
|
Map.of("max_apps", 5), Instant.now(),
|
||||||
|
Instant.now().plusSeconds(60), 0);
|
||||||
|
assertThat(info.getLimit("max_apps", 99)).isEqualTo(5);
|
||||||
|
assertThat(info.getLimit("max_users", 99)).isEqualTo(99);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void isExpired_honoursGracePeriod() {
|
||||||
|
Instant pastByTen = Instant.now().minusSeconds(10 * 86400);
|
||||||
|
LicenseInfo withinGrace = new LicenseInfo(
|
||||||
|
UUID.randomUUID(), "acme", null, Map.of(),
|
||||||
|
Instant.now().minusSeconds(40 * 86400),
|
||||||
|
pastByTen, 30);
|
||||||
|
assertThat(withinGrace.isExpired()).isFalse(); // 10 days into a 30-day grace
|
||||||
|
LicenseInfo pastGrace = new LicenseInfo(
|
||||||
|
UUID.randomUUID(), "acme", null, Map.of(),
|
||||||
|
Instant.now().minusSeconds(40 * 86400),
|
||||||
|
pastByTen, 5);
|
||||||
|
assertThat(pastGrace.isExpired()).isTrue(); // 10 days is past the 5-day grace
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
package io.cameleer.license;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
class LicenseStateMachineTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void noLicense_isAbsent() {
|
||||||
|
assertThat(LicenseStateMachine.classify(null, null)).isEqualTo(LicenseState.ABSENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void invalidReason_isInvalid() {
|
||||||
|
assertThat(LicenseStateMachine.classify(null, "signature failed")).isEqualTo(LicenseState.INVALID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void activeBeforeExp() {
|
||||||
|
LicenseInfo info = info(Instant.now().plusSeconds(86400), 0);
|
||||||
|
assertThat(LicenseStateMachine.classify(info, null)).isEqualTo(LicenseState.ACTIVE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void graceWithinGracePeriod() {
|
||||||
|
LicenseInfo info = info(Instant.now().minusSeconds(86400), 7);
|
||||||
|
assertThat(LicenseStateMachine.classify(info, null)).isEqualTo(LicenseState.GRACE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void expiredAfterGrace() {
|
||||||
|
LicenseInfo info = info(Instant.now().minusSeconds(8L * 86400), 7);
|
||||||
|
assertThat(LicenseStateMachine.classify(info, null)).isEqualTo(LicenseState.EXPIRED);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void expiredImmediatelyWithZeroGrace() {
|
||||||
|
LicenseInfo info = info(Instant.now().minusSeconds(60), 0);
|
||||||
|
assertThat(LicenseStateMachine.classify(info, null)).isEqualTo(LicenseState.EXPIRED);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void invalidWinsOverPresentLicense() {
|
||||||
|
LicenseInfo info = info(Instant.now().plusSeconds(86400), 0);
|
||||||
|
assertThat(LicenseStateMachine.classify(info, "tenant mismatch")).isEqualTo(LicenseState.INVALID);
|
||||||
|
}
|
||||||
|
|
||||||
|
private LicenseInfo info(Instant exp, int graceDays) {
|
||||||
|
return new LicenseInfo(UUID.randomUUID(), "acme", null, Map.of(),
|
||||||
|
Instant.now().minusSeconds(3600), exp, graceDays);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,141 @@
|
|||||||
|
package io.cameleer.license;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.security.KeyPair;
|
||||||
|
import java.security.KeyPairGenerator;
|
||||||
|
import java.security.PrivateKey;
|
||||||
|
import java.security.Signature;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.time.temporal.ChronoUnit;
|
||||||
|
import java.util.Base64;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||||
|
|
||||||
|
class LicenseValidatorTest {
|
||||||
|
|
||||||
|
private KeyPair generateKeyPair() throws Exception {
|
||||||
|
KeyPairGenerator kpg = KeyPairGenerator.getInstance("Ed25519");
|
||||||
|
return kpg.generateKeyPair();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String sign(PrivateKey key, String payload) throws Exception {
|
||||||
|
Signature signer = Signature.getInstance("Ed25519");
|
||||||
|
signer.initSign(key);
|
||||||
|
signer.update(payload.getBytes());
|
||||||
|
return Base64.getEncoder().encodeToString(signer.sign());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void validate_validLicense_returnsLicenseInfo() throws Exception {
|
||||||
|
KeyPair kp = generateKeyPair();
|
||||||
|
String publicKeyBase64 = Base64.getEncoder().encodeToString(kp.getPublic().getEncoded());
|
||||||
|
LicenseValidator validator = new LicenseValidator(publicKeyBase64, "acme");
|
||||||
|
|
||||||
|
Instant expires = Instant.now().plus(365, ChronoUnit.DAYS);
|
||||||
|
String payload = """
|
||||||
|
{"licenseId":"%s","tenantId":"acme","label":"HIGH","tier":"HIGH","limits":{"max_agents":50,"retention_days":90},"iat":%d,"exp":%d,"gracePeriodDays":7}
|
||||||
|
""".formatted(UUID.randomUUID(), Instant.now().getEpochSecond(), expires.getEpochSecond()).trim();
|
||||||
|
String signature = sign(kp.getPrivate(), payload);
|
||||||
|
String token = Base64.getEncoder().encodeToString(payload.getBytes()) + "." + signature;
|
||||||
|
|
||||||
|
LicenseInfo info = validator.validate(token);
|
||||||
|
|
||||||
|
assertThat(info.label()).isEqualTo("HIGH");
|
||||||
|
assertThat(info.getLimit("max_agents", 0)).isEqualTo(50);
|
||||||
|
assertThat(info.isExpired()).isFalse();
|
||||||
|
assertThat(info.tenantId()).isEqualTo("acme");
|
||||||
|
assertThat(info.gracePeriodDays()).isEqualTo(7);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void validate_expiredLicense_throwsException() throws Exception {
|
||||||
|
KeyPair kp = generateKeyPair();
|
||||||
|
String publicKeyBase64 = Base64.getEncoder().encodeToString(kp.getPublic().getEncoded());
|
||||||
|
LicenseValidator validator = new LicenseValidator(publicKeyBase64, "acme");
|
||||||
|
|
||||||
|
Instant past = Instant.now().minus(1, ChronoUnit.DAYS);
|
||||||
|
String payload = """
|
||||||
|
{"licenseId":"%s","tenantId":"acme","tier":"LOW","limits":{},"iat":%d,"exp":%d}
|
||||||
|
""".formatted(UUID.randomUUID(), past.minus(30, ChronoUnit.DAYS).getEpochSecond(), past.getEpochSecond()).trim();
|
||||||
|
String signature = sign(kp.getPrivate(), payload);
|
||||||
|
String token = Base64.getEncoder().encodeToString(payload.getBytes()) + "." + signature;
|
||||||
|
|
||||||
|
assertThatThrownBy(() -> validator.validate(token))
|
||||||
|
.isInstanceOf(IllegalArgumentException.class)
|
||||||
|
.hasMessageContaining("expired");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void validate_tamperedPayload_throwsException() throws Exception {
|
||||||
|
KeyPair kp = generateKeyPair();
|
||||||
|
String publicKeyBase64 = Base64.getEncoder().encodeToString(kp.getPublic().getEncoded());
|
||||||
|
LicenseValidator validator = new LicenseValidator(publicKeyBase64, "acme");
|
||||||
|
|
||||||
|
String payload = """
|
||||||
|
{"licenseId":"%s","tenantId":"acme","tier":"LOW","limits":{},"iat":0,"exp":9999999999}
|
||||||
|
""".formatted(UUID.randomUUID()).trim();
|
||||||
|
String signature = sign(kp.getPrivate(), payload);
|
||||||
|
|
||||||
|
// Tamper with payload
|
||||||
|
String tampered = payload.replace("LOW", "BUSINESS");
|
||||||
|
String token = Base64.getEncoder().encodeToString(tampered.getBytes()) + "." + signature;
|
||||||
|
|
||||||
|
assertThatThrownBy(() -> validator.validate(token))
|
||||||
|
.isInstanceOf(SecurityException.class)
|
||||||
|
.hasMessageContaining("signature");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void validate_missingTenantId_throws() throws Exception {
|
||||||
|
KeyPair kp = generateKeyPair();
|
||||||
|
String publicKeyBase64 = Base64.getEncoder().encodeToString(kp.getPublic().getEncoded());
|
||||||
|
LicenseValidator validator = new LicenseValidator(publicKeyBase64, "acme");
|
||||||
|
|
||||||
|
Instant exp = Instant.now().plus(30, ChronoUnit.DAYS);
|
||||||
|
String payload = """
|
||||||
|
{"licenseId":"%s","tier":"X","limits":{},"iat":%d,"exp":%d}
|
||||||
|
""".formatted(UUID.randomUUID(), Instant.now().getEpochSecond(), exp.getEpochSecond()).trim();
|
||||||
|
String token = Base64.getEncoder().encodeToString(payload.getBytes()) + "." + sign(kp.getPrivate(), payload);
|
||||||
|
|
||||||
|
assertThatThrownBy(() -> validator.validate(token))
|
||||||
|
.isInstanceOf(IllegalArgumentException.class)
|
||||||
|
.hasMessageContaining("tenantId");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void validate_tenantIdMismatch_throws() throws Exception {
|
||||||
|
KeyPair kp = generateKeyPair();
|
||||||
|
String publicKeyBase64 = Base64.getEncoder().encodeToString(kp.getPublic().getEncoded());
|
||||||
|
LicenseValidator validator = new LicenseValidator(publicKeyBase64, "beta");
|
||||||
|
|
||||||
|
Instant exp = Instant.now().plus(30, ChronoUnit.DAYS);
|
||||||
|
String payload = """
|
||||||
|
{"licenseId":"%s","tenantId":"acme","tier":"X","limits":{},"iat":%d,"exp":%d}
|
||||||
|
""".formatted(UUID.randomUUID(), Instant.now().getEpochSecond(), exp.getEpochSecond()).trim();
|
||||||
|
String token = Base64.getEncoder().encodeToString(payload.getBytes()) + "." + sign(kp.getPrivate(), payload);
|
||||||
|
|
||||||
|
assertThatThrownBy(() -> validator.validate(token))
|
||||||
|
.isInstanceOf(IllegalArgumentException.class)
|
||||||
|
.hasMessageContaining("tenantId");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void validate_missingLicenseId_throws() throws Exception {
|
||||||
|
KeyPair kp = generateKeyPair();
|
||||||
|
String publicKeyBase64 = Base64.getEncoder().encodeToString(kp.getPublic().getEncoded());
|
||||||
|
LicenseValidator validator = new LicenseValidator(publicKeyBase64, "acme");
|
||||||
|
|
||||||
|
Instant exp = Instant.now().plus(30, ChronoUnit.DAYS);
|
||||||
|
String payload = """
|
||||||
|
{"tenantId":"acme","tier":"X","limits":{},"iat":%d,"exp":%d}
|
||||||
|
""".formatted(Instant.now().getEpochSecond(), exp.getEpochSecond()).trim();
|
||||||
|
String token = Base64.getEncoder().encodeToString(payload.getBytes()) + "." + sign(kp.getPrivate(), payload);
|
||||||
|
|
||||||
|
assertThatThrownBy(() -> validator.validate(token))
|
||||||
|
.isInstanceOf(IllegalArgumentException.class)
|
||||||
|
.hasMessageContaining("licenseId");
|
||||||
|
}
|
||||||
|
}
|
||||||
287
cameleer-license-minter/README.md
Normal file
287
cameleer-license-minter/README.md
Normal file
@@ -0,0 +1,287 @@
|
|||||||
|
# cameleer-license-minter
|
||||||
|
|
||||||
|
Standalone vendor-side tool for producing signed Ed25519 license tokens consumed by `cameleer-server`. The minter is intentionally **not** a runtime or compile-scope dependency of the server — the server only ships with the matching public key and validates tokens via `LicenseValidator`. The private signing key never leaves the vendor's environment.
|
||||||
|
|
||||||
|
- Module GAV: `io.cameleer:cameleer-license-minter:1.0-SNAPSHOT`
|
||||||
|
- Maven coordinates of the runtime server (does **not** transitively pull this module): `io.cameleer:cameleer-server-app:1.0-SNAPSHOT`
|
||||||
|
- Build artifacts (after `mvn -pl cameleer-license-minter package`):
|
||||||
|
- `target/cameleer-license-minter-1.0-SNAPSHOT.jar` — plain library JAR (consumable as a Maven `test` dependency or via the `LicenseMinter` API in custom tooling)
|
||||||
|
- `target/cameleer-license-minter-1.0-SNAPSHOT-cli.jar` — fat CLI JAR with main class `io.cameleer.license.minter.cli.LicenseMinterCli`
|
||||||
|
|
||||||
|
## Table of contents
|
||||||
|
|
||||||
|
## Audience
|
||||||
|
|
||||||
|
## Build
|
||||||
|
|
||||||
|
## Public Java API
|
||||||
|
|
||||||
|
## CLI usage
|
||||||
|
|
||||||
|
## Token format
|
||||||
|
|
||||||
|
## LicenseInfo schema
|
||||||
|
|
||||||
|
## Limits dictionary
|
||||||
|
|
||||||
|
## Generating an Ed25519 key pair
|
||||||
|
|
||||||
|
## Worked example
|
||||||
|
|
||||||
|
## Security guidance
|
||||||
|
|
||||||
|
## Compatibility / runtime separation
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Audience
|
||||||
|
|
||||||
|
Vendors / SaaS operators issuing licenses to customers who run `cameleer-server`. End-customer operators looking for *how to install* a token should read `docs/license-enforcement.md` instead.
|
||||||
|
|
||||||
|
## Build
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From the repo root
|
||||||
|
mvn -pl cameleer-license-minter package
|
||||||
|
```
|
||||||
|
|
||||||
|
Two JARs land in `cameleer-license-minter/target/`:
|
||||||
|
|
||||||
|
| Artifact | Purpose |
|
||||||
|
|---|---|
|
||||||
|
| `cameleer-license-minter-1.0-SNAPSHOT.jar` | Plain library (the `repackage` execution for the main artifact is disabled; see `pom.xml:50-54`). Use this when embedding the minter inside your own tooling or a unit test that needs a fresh signed token. |
|
||||||
|
| `cameleer-license-minter-1.0-SNAPSHOT-cli.jar` | Fat CLI JAR. Repackaged by Spring Boot's `spring-boot-maven-plugin` with classifier `cli`; main class is `io.cameleer.license.minter.cli.LicenseMinterCli`. |
|
||||||
|
|
||||||
|
## Public Java API
|
||||||
|
|
||||||
|
`io.cameleer.license.minter.LicenseMinter` is the only entry point for the library. It is a final, stateless utility class:
|
||||||
|
|
||||||
|
```java
|
||||||
|
import io.cameleer.license.minter.LicenseMinter;
|
||||||
|
import io.cameleer.license.LicenseInfo;
|
||||||
|
|
||||||
|
LicenseInfo info = new LicenseInfo(
|
||||||
|
java.util.UUID.randomUUID(),
|
||||||
|
"acme-prod", // tenantId — must match server's CAMELEER_SERVER_TENANT_ID
|
||||||
|
"Acme Production (Tier B)", // human label, optional
|
||||||
|
java.util.Map.of(
|
||||||
|
"max_environments", 3,
|
||||||
|
"max_apps", 25,
|
||||||
|
"max_agents", 50,
|
||||||
|
"max_users", 20,
|
||||||
|
"max_total_replicas", 30
|
||||||
|
),
|
||||||
|
java.time.Instant.now(), // issuedAt
|
||||||
|
java.time.Instant.parse("2027-01-01T00:00:00Z"), // expiresAt
|
||||||
|
7 // gracePeriodDays
|
||||||
|
);
|
||||||
|
|
||||||
|
String token = LicenseMinter.mint(info, ed25519PrivateKey);
|
||||||
|
```
|
||||||
|
|
||||||
|
Source: `cameleer-license-minter/src/main/java/com/cameleer/license/minter/LicenseMinter.java:20`.
|
||||||
|
|
||||||
|
The method is thread-safe; the underlying Jackson `ObjectMapper` is configured once with `ORDER_MAP_ENTRIES_BY_KEYS` so canonical-JSON serialization is deterministic across runs and process boundaries.
|
||||||
|
|
||||||
|
`LicenseMinter.mint` will throw `IllegalStateException` if the JCE provider rejects the private key or the payload cannot be serialized.
|
||||||
|
|
||||||
|
## CLI usage
|
||||||
|
|
||||||
|
The CLI entry point is `io.cameleer.license.minter.cli.LicenseMinterCli`. Run it from the fat JAR produced by the build:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
java -jar cameleer-license-minter/target/cameleer-license-minter-1.0-SNAPSHOT-cli.jar \
|
||||||
|
--private-key=/secure/keys/cameleer-license-priv.pem \
|
||||||
|
--tenant=acme-prod \
|
||||||
|
--label="Acme Production (Tier B)" \
|
||||||
|
--expires=2027-01-01 \
|
||||||
|
--grace-days=7 \
|
||||||
|
--max-environments=3 \
|
||||||
|
--max-apps=25 \
|
||||||
|
--max-agents=50 \
|
||||||
|
--max-users=20 \
|
||||||
|
--max-total-replicas=30 \
|
||||||
|
--output=/secure/out/acme-prod.lic \
|
||||||
|
--public-key=/secure/keys/cameleer-license-pub.b64 \
|
||||||
|
--verify
|
||||||
|
```
|
||||||
|
|
||||||
|
### Flag reference
|
||||||
|
|
||||||
|
Source of truth: `cameleer-license-minter/src/main/java/com/cameleer/license/minter/cli/LicenseMinterCli.java:26`.
|
||||||
|
|
||||||
|
| Flag | Required | Meaning |
|
||||||
|
|---|---|---|
|
||||||
|
| `--private-key=<path>` | yes | Path to a PKCS#8-encoded Ed25519 private key. Both PEM (`-----BEGIN PRIVATE KEY-----`) and raw base64 are accepted (`LicenseMinterCli.readEd25519PrivateKey`). |
|
||||||
|
| `--tenant=<tenantId>` | yes | The exact `tenantId` the server will compare against `CAMELEER_SERVER_TENANT_ID`. Mismatch causes the validator to throw at install / revalidation. |
|
||||||
|
| `--expires=<YYYY-MM-DD>` | yes | Expiration date interpreted as midnight UTC. The validator considers tokens expired once `now > exp + gracePeriodDays`. |
|
||||||
|
| `--label=<text>` | no | Human-readable label, surfaced via `GET /api/v1/admin/license` and `/api/v1/admin/license/usage`. |
|
||||||
|
| `--grace-days=<int>` | no | Number of days the license stays usable after `--expires`. Defaults to `0`. |
|
||||||
|
| `--max-<limitkey>=<int>` | no, repeatable | Each `--max-foo-bar` flag becomes the limit key `max_foo_bar`. See the limits dictionary below. Unknown keys are accepted by the minter (the server side ignores keys it does not understand and falls through to defaults). |
|
||||||
|
| `--output=<path>` | no | Write the token to a file. When omitted, the token is printed to stdout. On `--verify` failure the file is deleted. |
|
||||||
|
| `--public-key=<path>` | no, required for `--verify` | Path to the matching base64 X.509 SPKI public key file (one line, no PEM markers). |
|
||||||
|
| `--verify` | no | After minting, parse + signature-check the token using `--public-key` and `--tenant`. Exits non-zero if verification fails. |
|
||||||
|
|
||||||
|
Exit codes: `0` on success, `1` on minting / IO failure, `2` on argument validation failure, `3` on `--verify` failure.
|
||||||
|
|
||||||
|
## Token format
|
||||||
|
|
||||||
|
A token is the concatenation of two **standard** base64 segments joined by a literal `.`:
|
||||||
|
|
||||||
|
```
|
||||||
|
base64(canonicalJson) + "." + base64(ed25519Signature)
|
||||||
|
```
|
||||||
|
|
||||||
|
- The canonical JSON payload is produced by `LicenseMinter.canonicalPayload(...)` with keys sorted lexicographically and `limits` rendered as a sorted object. This makes the byte sequence deterministic given a fixed `LicenseInfo`.
|
||||||
|
- The signature is computed with `Signature.getInstance("Ed25519")` over the canonical payload bytes (not over the base64-encoded form).
|
||||||
|
- Encoding is `Base64.getEncoder()` (RFC 4648 §4 — *not* base64url). The validator decodes with the matching `Base64.getDecoder()`.
|
||||||
|
|
||||||
|
`LicenseValidator.validate(...)` (`cameleer-license-api/src/main/java/com/cameleer/license/LicenseValidator.java:42`) splits on the first `.`, decodes both halves, verifies the signature, then deserializes the payload.
|
||||||
|
|
||||||
|
## LicenseInfo schema
|
||||||
|
|
||||||
|
Source: `cameleer-license-api/src/main/java/com/cameleer/license/LicenseInfo.java`. Field-by-field:
|
||||||
|
|
||||||
|
| Field | Type | Required | Semantics |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `licenseId` | `UUID` | yes | Stable identifier for this token. The server's audit trail records install/replace transitions by license id; renewals must use a fresh UUID so audit history is non-ambiguous. |
|
||||||
|
| `tenantId` | `String` | yes | Must equal the server's `CAMELEER_SERVER_TENANT_ID`. The validator throws `IllegalArgumentException` on mismatch. Blank values are rejected by the canonical record constructor. |
|
||||||
|
| `label` | `String` | no | Free-form human label. Surfaced on the admin/usage endpoints and the operator UI. Has no enforcement semantics. |
|
||||||
|
| `limits` | `Map<String,Integer>` | yes (may be empty) | License-specific overrides. Any key that appears here is unioned over `DefaultTierLimits.DEFAULTS` to form the effective caps in `ACTIVE` / `GRACE` states. Keys not present fall through to defaults. |
|
||||||
|
| `issuedAt` | `Instant` (epoch seconds in JSON `iat`) | yes | Stamped by the minter; not currently consulted by the validator beyond informational logging. |
|
||||||
|
| `expiresAt` | `Instant` (epoch seconds in JSON `exp`) | yes | The validator throws if `now > expiresAt + gracePeriodDays * 86400` at install or revalidation. |
|
||||||
|
| `gracePeriodDays` | `int` | yes (>= 0) | Window after `expiresAt` during which the gate transitions to `GRACE` (license still grants its caps) before flipping to `EXPIRED`. Negative values are rejected at construction. |
|
||||||
|
|
||||||
|
## Limits dictionary
|
||||||
|
|
||||||
|
Canonical key set: `cameleer-license-api/src/main/java/com/cameleer/license/DefaultTierLimits.java`. Any key not listed here is silently ignored by the server's `LicenseGate.getEffectiveLimits()`.
|
||||||
|
|
||||||
|
| CLI flag | Key | Default | What the server enforces |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `--max-environments` | `max_environments` | 1 | `EnvironmentService.create(...)` consults `LicenseEnforcer.assertWithinCap("max_environments", currentCount, 1)`. |
|
||||||
|
| `--max-apps` | `max_apps` | 3 | `AppService.createApp(...)` checks total app count across all envs. |
|
||||||
|
| `--max-agents` | `max_agents` | 5 | `AgentRegistryService.register(...)` checks live agent count. |
|
||||||
|
| `--max-users` | `max_users` | 3 | User creation paths (`UserAdminController`, `UiAuthController` self-signup, `OidcAuthController` first-login). |
|
||||||
|
| `--max-outbound-connections` | `max_outbound_connections` | 1 | `OutboundConnectionServiceImpl.create(...)`. |
|
||||||
|
| `--max-alert-rules` | `max_alert_rules` | 2 | `AlertRuleController.create(...)`. |
|
||||||
|
| `--max-total-cpu-millis` | `max_total_cpu_millis` | 2000 | `DeploymentExecutor` PRE_FLIGHT compute cap (sum of `replicas * cpuLimit` over non-stopped deployments). |
|
||||||
|
| `--max-total-memory-mb` | `max_total_memory_mb` | 2048 | `DeploymentExecutor` PRE_FLIGHT compute cap (sum of `replicas * memoryLimitMb`). |
|
||||||
|
| `--max-total-replicas` | `max_total_replicas` | 5 | `DeploymentExecutor` PRE_FLIGHT compute cap (sum of `replicas`). |
|
||||||
|
| `--max-execution-retention-days` | `max_execution_retention_days` | 1 | ClickHouse TTL cap for `executions`, `processor_executions`. Effective TTL = `min(cap, env.executionRetentionDays)`. |
|
||||||
|
| `--max-log-retention-days` | `max_log_retention_days` | 1 | ClickHouse TTL cap for `logs`. |
|
||||||
|
| `--max-metric-retention-days` | `max_metric_retention_days` | 1 | ClickHouse TTL cap for `agent_metrics`, `agent_events`. |
|
||||||
|
| `--max-jar-retention-count` | `max_jar_retention_count` | 3 | `EnvironmentAdminController` PUT `/{envSlug}/jar-retention` rejects requests above this cap. Also bounds the daily `JarRetentionJob`. |
|
||||||
|
|
||||||
|
## Generating an Ed25519 key pair
|
||||||
|
|
||||||
|
The minter and validator both rely on the JCE `Ed25519` algorithm shipped with JDK 17+. No external crypto library is needed.
|
||||||
|
|
||||||
|
```java
|
||||||
|
import java.security.KeyPair;
|
||||||
|
import java.security.KeyPairGenerator;
|
||||||
|
import java.util.Base64;
|
||||||
|
|
||||||
|
KeyPair kp = KeyPairGenerator.getInstance("Ed25519").generateKeyPair();
|
||||||
|
|
||||||
|
// 32-byte public key, X.509 SubjectPublicKeyInfo wrapped — this is what the server expects.
|
||||||
|
String publicKeyB64 = Base64.getEncoder().encodeToString(kp.getPublic().getEncoded());
|
||||||
|
|
||||||
|
// PKCS#8 private key — the CLI's --private-key reader accepts this either as raw base64
|
||||||
|
// or PEM-wrapped (`-----BEGIN PRIVATE KEY-----`).
|
||||||
|
String privateKeyB64 = Base64.getEncoder().encodeToString(kp.getPrivate().getEncoded());
|
||||||
|
```
|
||||||
|
|
||||||
|
A one-liner using the JDK's `keytool` is **not** sufficient — `keytool` cannot produce raw Ed25519 PKCS#8 in a directly-usable shape for our reader. Generating via the API above (or `openssl genpkey -algorithm ed25519`) is the supported path.
|
||||||
|
|
||||||
|
For OpenSSL:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
openssl genpkey -algorithm ed25519 -out cameleer-license-priv.pem
|
||||||
|
openssl pkey -in cameleer-license-priv.pem -pubout -outform DER \
|
||||||
|
| base64 -w0 > cameleer-license-pub.b64
|
||||||
|
```
|
||||||
|
|
||||||
|
The resulting `cameleer-license-pub.b64` is the value to put into `CAMELEER_SERVER_LICENSE_PUBLICKEY`.
|
||||||
|
|
||||||
|
## Worked example
|
||||||
|
|
||||||
|
End-to-end: generate a key pair, mint a license, install it on a running server, verify enforcement.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Vendor side — generate the keypair
|
||||||
|
openssl genpkey -algorithm ed25519 -out /secrets/cameleer-priv.pem
|
||||||
|
openssl pkey -in /secrets/cameleer-priv.pem -pubout -outform DER \
|
||||||
|
| base64 -w0 > /secrets/cameleer-pub.b64
|
||||||
|
|
||||||
|
# 2. Vendor side — distribute the public key (commit to deployment config / Vault / k8s Secret)
|
||||||
|
cat /secrets/cameleer-pub.b64
|
||||||
|
# MCowBQYDK2VwAyEAxxxxx...
|
||||||
|
|
||||||
|
# 3. Vendor side — mint a license for a customer tenant
|
||||||
|
mvn -pl cameleer-license-minter package -DskipTests
|
||||||
|
java -jar cameleer-license-minter/target/cameleer-license-minter-1.0-SNAPSHOT-cli.jar \
|
||||||
|
--private-key=/secrets/cameleer-priv.pem \
|
||||||
|
--public-key=/secrets/cameleer-pub.b64 \
|
||||||
|
--tenant=acme-prod \
|
||||||
|
--label="Acme Production" \
|
||||||
|
--expires=2027-01-01 \
|
||||||
|
--grace-days=14 \
|
||||||
|
--max-environments=3 \
|
||||||
|
--max-apps=25 \
|
||||||
|
--max-agents=50 \
|
||||||
|
--max-users=20 \
|
||||||
|
--max-total-replicas=30 \
|
||||||
|
--max-total-cpu-millis=15000 \
|
||||||
|
--max-total-memory-mb=16384 \
|
||||||
|
--max-execution-retention-days=30 \
|
||||||
|
--max-log-retention-days=14 \
|
||||||
|
--max-metric-retention-days=14 \
|
||||||
|
--max-jar-retention-count=10 \
|
||||||
|
--output=/tmp/acme.lic \
|
||||||
|
--verify
|
||||||
|
|
||||||
|
# 4. Customer side — server boots with public key + tenant id matching the mint
|
||||||
|
export CAMELEER_SERVER_TENANT_ID=acme-prod
|
||||||
|
export CAMELEER_SERVER_LICENSE_PUBLICKEY=$(cat /secrets/cameleer-pub.b64)
|
||||||
|
|
||||||
|
# 5. Customer side — install via the admin API after boot
|
||||||
|
curl -X POST https://server.example.com/api/v1/admin/license \
|
||||||
|
-H "Authorization: Bearer ${ADMIN_JWT}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"token\": \"$(cat /tmp/acme.lic)\"}"
|
||||||
|
|
||||||
|
# 6. Customer side — verify it was accepted
|
||||||
|
curl https://server.example.com/api/v1/admin/license \
|
||||||
|
-H "Authorization: Bearer ${ADMIN_JWT}"
|
||||||
|
# {"state":"ACTIVE","invalidReason":null,"envelope":{...},"lastValidatedAt":"..."}
|
||||||
|
|
||||||
|
curl https://server.example.com/api/v1/admin/license/usage \
|
||||||
|
-H "Authorization: Bearer ${ADMIN_JWT}"
|
||||||
|
# Shows current/cap/source per limit key
|
||||||
|
```
|
||||||
|
|
||||||
|
For boot-time installation (preferred for SaaS-managed deployments), set `CAMELEER_SERVER_LICENSE_TOKEN` instead of POSTing — see `docs/license-enforcement.md`.
|
||||||
|
|
||||||
|
## Security guidance
|
||||||
|
|
||||||
|
- **The Ed25519 private key is the trust root.** Anyone who holds it can mint licenses for any tenant. Treat it like a code-signing key.
|
||||||
|
- **Storage.** Production private keys belong in an HSM, KMS (e.g. AWS KMS / GCP KMS with non-exportable signing), or a sealed Vault transit backend. A sealed file on a laptop is acceptable for low-volume / pre-production minting only and should never be committed to git or shared via chat.
|
||||||
|
- **Rotation.** Rotation is destructive: every customer running with the *old* public key will reject all new tokens signed with the *new* private key. The pragmatic procedure is:
|
||||||
|
1. Generate the new keypair.
|
||||||
|
2. Distribute the new public key (`CAMELEER_SERVER_LICENSE_PUBLICKEY`) to every tenant's server config.
|
||||||
|
3. Once tenants confirm they are running with the new public key, re-mint and re-issue every active license under the new key.
|
||||||
|
4. Decommission the old private key.
|
||||||
|
Practical revocation flows through expiry — keep license terms short enough (12 months or less) that planned rotations stay aligned with renewal cadence.
|
||||||
|
- **Auditing.** The server records every install/replace/reject under `AuditCategory.LICENSE`. The minter itself does not write audit rows; if you need a vendor-side audit trail of mint operations, wrap `LicenseMinter.mint(...)` in your own ticketing pipeline.
|
||||||
|
- **Never commit private keys.** `.gitignore` does not block them by name — use a `secrets/` directory excluded by your repository's policy, or store them entirely outside the working tree.
|
||||||
|
|
||||||
|
## Compatibility / runtime separation
|
||||||
|
|
||||||
|
The minter is intentionally absent from `cameleer-server-app`'s production classpath. To verify after a build:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mvn -pl cameleer-server-app dependency:tree | grep license-minter
|
||||||
|
# expected: empty output (or, in development branches, a single line scoped 'test')
|
||||||
|
```
|
||||||
|
|
||||||
|
`cameleer-license-minter/pom.xml` depends on `cameleer-license-api` for the pure license contract types (`LicenseInfo`, `LicenseValidator`) used by mint + `--verify`. It deliberately does **not** depend on `cameleer-server-core`, so consumers of the minter (e.g. `cameleer-saas`) do not inherit server-runtime types onto their classpath. The server app intentionally does not depend on the minter — vendors mint outside the customer-deployed runtime, and a compromised customer cannot leverage server code to forge tokens.
|
||||||
69
cameleer-license-minter/pom.xml
Normal file
69
cameleer-license-minter/pom.xml
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>io.cameleer</groupId>
|
||||||
|
<artifactId>cameleer-server-parent</artifactId>
|
||||||
|
<version>1.0-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>cameleer-license-minter</artifactId>
|
||||||
|
<name>Cameleer License Minter</name>
|
||||||
|
<description>Vendor-only Ed25519 license signing library + CLI</description>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.cameleer</groupId>
|
||||||
|
<artifactId>cameleer-license-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-databind</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.junit.jupiter</groupId>
|
||||||
|
<artifactId>junit-jupiter</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.assertj</groupId>
|
||||||
|
<artifactId>assertj-core</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.springframework.boot</groupId>
|
||||||
|
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<!-- Disable the default repackage so the main artifact stays as a plain library
|
||||||
|
JAR consumable as a Maven test-scope dependency by cameleer-server-app. -->
|
||||||
|
<execution>
|
||||||
|
<id>repackage</id>
|
||||||
|
<phase>none</phase>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>repackage-cli</id>
|
||||||
|
<goals>
|
||||||
|
<goal>repackage</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<classifier>cli</classifier>
|
||||||
|
<mainClass>io.cameleer.license.minter.cli.LicenseMinterCli</mainClass>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</project>
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
package io.cameleer.license.minter;
|
||||||
|
|
||||||
|
import io.cameleer.license.LicenseInfo;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.fasterxml.jackson.databind.SerializationFeature;
|
||||||
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
|
|
||||||
|
import java.security.PrivateKey;
|
||||||
|
import java.security.Signature;
|
||||||
|
import java.util.Base64;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
public final class LicenseMinter {
|
||||||
|
|
||||||
|
private static final ObjectMapper MAPPER = new ObjectMapper()
|
||||||
|
.configure(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, true);
|
||||||
|
|
||||||
|
private LicenseMinter() {}
|
||||||
|
|
||||||
|
public static String mint(LicenseInfo info, PrivateKey ed25519PrivateKey) {
|
||||||
|
byte[] payload = canonicalPayload(info);
|
||||||
|
try {
|
||||||
|
Signature signer = Signature.getInstance("Ed25519");
|
||||||
|
signer.initSign(ed25519PrivateKey);
|
||||||
|
signer.update(payload);
|
||||||
|
byte[] sig = signer.sign();
|
||||||
|
return Base64.getEncoder().encodeToString(payload) + "." + Base64.getEncoder().encodeToString(sig);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IllegalStateException("Failed to sign license", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static byte[] canonicalPayload(LicenseInfo info) {
|
||||||
|
ObjectNode root = MAPPER.createObjectNode();
|
||||||
|
root.put("exp", info.expiresAt().getEpochSecond());
|
||||||
|
root.put("gracePeriodDays", info.gracePeriodDays());
|
||||||
|
root.put("iat", info.issuedAt().getEpochSecond());
|
||||||
|
if (info.label() != null) {
|
||||||
|
root.put("label", info.label());
|
||||||
|
}
|
||||||
|
root.put("licenseId", info.licenseId().toString());
|
||||||
|
ObjectNode limits = MAPPER.createObjectNode();
|
||||||
|
new TreeMap<>(info.limits()).forEach(limits::put);
|
||||||
|
root.set("limits", limits);
|
||||||
|
root.put("tenantId", info.tenantId());
|
||||||
|
try {
|
||||||
|
return MAPPER.writeValueAsBytes(root);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IllegalStateException("Failed to serialize license payload", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,136 @@
|
|||||||
|
package io.cameleer.license.minter.cli;
|
||||||
|
|
||||||
|
import io.cameleer.license.minter.LicenseMinter;
|
||||||
|
import io.cameleer.license.LicenseInfo;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.security.KeyFactory;
|
||||||
|
import java.security.PrivateKey;
|
||||||
|
import java.security.spec.PKCS8EncodedKeySpec;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.time.LocalDate;
|
||||||
|
import java.time.ZoneOffset;
|
||||||
|
import java.util.Base64;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public final class LicenseMinterCli {
|
||||||
|
|
||||||
|
private static final Set<String> KNOWN_FLAGS = Set.of(
|
||||||
|
"--private-key", "--public-key", "--tenant", "--label",
|
||||||
|
"--expires", "--grace-days", "--output", "--verify"
|
||||||
|
);
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
System.exit(run(args));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int run(String[] args) {
|
||||||
|
return run(args, System.out, System.err);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int run(String[] args, PrintStream out, PrintStream err) {
|
||||||
|
Map<String, String> flags = new LinkedHashMap<>();
|
||||||
|
Set<String> bool = new HashSet<>();
|
||||||
|
Map<String, Integer> limits = new TreeMap<>();
|
||||||
|
for (String arg : args) {
|
||||||
|
if (!arg.startsWith("--")) {
|
||||||
|
err.println("unexpected positional argument: " + arg);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
int eq = arg.indexOf('=');
|
||||||
|
String key = eq < 0 ? arg : arg.substring(0, eq);
|
||||||
|
String value = eq < 0 ? null : arg.substring(eq + 1);
|
||||||
|
if (key.startsWith("--max-")) {
|
||||||
|
String limitKey = "max_" + key.substring("--max-".length()).replace('-', '_');
|
||||||
|
if (value == null) {
|
||||||
|
err.println("missing value for " + key);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
limits.put(limitKey, Integer.parseInt(value));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!KNOWN_FLAGS.contains(key)) {
|
||||||
|
err.println("unknown flag: " + key);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
if (value == null) {
|
||||||
|
bool.add(key);
|
||||||
|
} else {
|
||||||
|
flags.put(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
String privPath = flags.get("--private-key");
|
||||||
|
String tenant = flags.get("--tenant");
|
||||||
|
String expiresIso = flags.get("--expires");
|
||||||
|
if (privPath == null || tenant == null || expiresIso == null) {
|
||||||
|
err.println("required: --private-key --tenant --expires");
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
PrivateKey privateKey = readEd25519PrivateKey(Path.of(privPath));
|
||||||
|
int graceDays = Integer.parseInt(flags.getOrDefault("--grace-days", "0"));
|
||||||
|
Instant exp = LocalDate.parse(expiresIso).atStartOfDay(ZoneOffset.UTC).toInstant();
|
||||||
|
LicenseInfo info = new LicenseInfo(
|
||||||
|
UUID.randomUUID(),
|
||||||
|
tenant,
|
||||||
|
flags.get("--label"),
|
||||||
|
Collections.unmodifiableMap(limits),
|
||||||
|
Instant.now(),
|
||||||
|
exp,
|
||||||
|
graceDays
|
||||||
|
);
|
||||||
|
String token = LicenseMinter.mint(info, privateKey);
|
||||||
|
|
||||||
|
String outPath = flags.get("--output");
|
||||||
|
if (outPath != null) {
|
||||||
|
Files.writeString(Path.of(outPath), token);
|
||||||
|
out.println("wrote " + outPath);
|
||||||
|
} else {
|
||||||
|
out.println(token);
|
||||||
|
}
|
||||||
|
if (bool.contains("--verify")) {
|
||||||
|
String pubPath = flags.get("--public-key");
|
||||||
|
if (pubPath == null) {
|
||||||
|
err.println("--verify requires --public-key");
|
||||||
|
if (outPath != null) Files.deleteIfExists(Path.of(outPath));
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
String pubB64 = Files.readString(Path.of(pubPath)).trim();
|
||||||
|
new io.cameleer.license.LicenseValidator(pubB64, tenant).validate(token);
|
||||||
|
out.println("verified ok");
|
||||||
|
} catch (Exception ve) {
|
||||||
|
err.println("VERIFY FAILED: " + ve.getMessage());
|
||||||
|
if (outPath != null) Files.deleteIfExists(Path.of(outPath));
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
} catch (Exception e) {
|
||||||
|
err.println("ERROR: " + e.getMessage());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static PrivateKey readEd25519PrivateKey(Path path) throws Exception {
|
||||||
|
String s = Files.readString(path).trim();
|
||||||
|
if (s.startsWith("-----BEGIN")) {
|
||||||
|
s = s.replaceAll("-----BEGIN [A-Z ]+-----", "")
|
||||||
|
.replaceAll("-----END [A-Z ]+-----", "")
|
||||||
|
.replaceAll("\\s", "");
|
||||||
|
}
|
||||||
|
byte[] der = Base64.getDecoder().decode(s);
|
||||||
|
return KeyFactory.getInstance("Ed25519")
|
||||||
|
.generatePrivate(new PKCS8EncodedKeySpec(der));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,53 @@
|
|||||||
|
package io.cameleer.license.minter;
|
||||||
|
|
||||||
|
import io.cameleer.license.LicenseInfo;
|
||||||
|
import io.cameleer.license.LicenseValidator;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.security.KeyPair;
|
||||||
|
import java.security.KeyPairGenerator;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.Base64;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
class LicenseMinterTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void roundTrip_validatorAcceptsMintedToken() throws Exception {
|
||||||
|
KeyPair kp = KeyPairGenerator.getInstance("Ed25519").generateKeyPair();
|
||||||
|
String publicB64 = Base64.getEncoder().encodeToString(kp.getPublic().getEncoded());
|
||||||
|
|
||||||
|
LicenseInfo info = new LicenseInfo(
|
||||||
|
UUID.randomUUID(), "acme", "ACME prod",
|
||||||
|
Map.of("max_apps", 50, "max_agents", 100),
|
||||||
|
Instant.now(), Instant.now().plusSeconds(86400), 7);
|
||||||
|
|
||||||
|
String token = LicenseMinter.mint(info, kp.getPrivate());
|
||||||
|
|
||||||
|
LicenseInfo parsed = new LicenseValidator(publicB64, "acme").validate(token);
|
||||||
|
assertThat(parsed.licenseId()).isEqualTo(info.licenseId());
|
||||||
|
assertThat(parsed.tenantId()).isEqualTo("acme");
|
||||||
|
assertThat(parsed.limits().get("max_apps")).isEqualTo(50);
|
||||||
|
assertThat(parsed.gracePeriodDays()).isEqualTo(7);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void canonicalJson_isStableAcrossRuns() throws Exception {
|
||||||
|
KeyPair kp = KeyPairGenerator.getInstance("Ed25519").generateKeyPair();
|
||||||
|
UUID id = UUID.randomUUID();
|
||||||
|
Instant now = Instant.parse("2026-04-25T10:00:00Z");
|
||||||
|
Instant exp = Instant.parse("2027-04-25T10:00:00Z");
|
||||||
|
LinkedHashMap<String, Integer> limits = new LinkedHashMap<>();
|
||||||
|
limits.put("max_apps", 5);
|
||||||
|
limits.put("max_agents", 10);
|
||||||
|
LicenseInfo info = new LicenseInfo(id, "acme", "label", limits, now, exp, 0);
|
||||||
|
|
||||||
|
String t1 = LicenseMinter.mint(info, kp.getPrivate());
|
||||||
|
String t2 = LicenseMinter.mint(info, kp.getPrivate());
|
||||||
|
assertThat(t1).isEqualTo(t2);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,112 @@
|
|||||||
|
package io.cameleer.license.minter.cli;
|
||||||
|
|
||||||
|
import io.cameleer.license.LicenseValidator;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.security.KeyPair;
|
||||||
|
import java.security.KeyPairGenerator;
|
||||||
|
import java.util.Base64;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
class LicenseMinterCliTest {
|
||||||
|
|
||||||
|
@TempDir Path tmp;
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void mints_validToken_validatorAccepts() throws Exception {
|
||||||
|
KeyPair kp = KeyPairGenerator.getInstance("Ed25519").generateKeyPair();
|
||||||
|
Path priv = tmp.resolve("priv.b64");
|
||||||
|
Path pub = tmp.resolve("pub.b64");
|
||||||
|
Files.writeString(priv, Base64.getEncoder().encodeToString(kp.getPrivate().getEncoded()));
|
||||||
|
Files.writeString(pub, Base64.getEncoder().encodeToString(kp.getPublic().getEncoded()));
|
||||||
|
Path out = tmp.resolve("license.tok");
|
||||||
|
|
||||||
|
int code = LicenseMinterCli.run(new String[]{
|
||||||
|
"--private-key=" + priv,
|
||||||
|
"--tenant=acme",
|
||||||
|
"--label=ACME",
|
||||||
|
"--expires=2099-12-31",
|
||||||
|
"--grace-days=30",
|
||||||
|
"--max-apps=50",
|
||||||
|
"--output=" + out
|
||||||
|
});
|
||||||
|
|
||||||
|
assertThat(code).isEqualTo(0);
|
||||||
|
String token = Files.readString(out).trim();
|
||||||
|
var info = new LicenseValidator(Files.readString(pub).trim(), "acme").validate(token);
|
||||||
|
assertThat(info.tenantId()).isEqualTo("acme");
|
||||||
|
assertThat(info.limits().get("max_apps")).isEqualTo(50);
|
||||||
|
assertThat(info.gracePeriodDays()).isEqualTo(30);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void unknownFlag_failsFast() {
|
||||||
|
int code = LicenseMinterCli.run(new String[]{"--frobnicate=yes"});
|
||||||
|
assertThat(code).isNotZero();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void verify_happyPath_succeeds() throws Exception {
|
||||||
|
KeyPair kp = KeyPairGenerator.getInstance("Ed25519").generateKeyPair();
|
||||||
|
Path priv = tmp.resolve("priv.b64");
|
||||||
|
Path pub = tmp.resolve("pub.b64");
|
||||||
|
Files.writeString(priv, Base64.getEncoder().encodeToString(kp.getPrivate().getEncoded()));
|
||||||
|
Files.writeString(pub, Base64.getEncoder().encodeToString(kp.getPublic().getEncoded()));
|
||||||
|
Path out = tmp.resolve("license.tok");
|
||||||
|
|
||||||
|
int code = LicenseMinterCli.run(new String[]{
|
||||||
|
"--private-key=" + priv,
|
||||||
|
"--public-key=" + pub,
|
||||||
|
"--tenant=acme",
|
||||||
|
"--expires=2099-12-31",
|
||||||
|
"--output=" + out,
|
||||||
|
"--verify"
|
||||||
|
});
|
||||||
|
|
||||||
|
assertThat(code).isEqualTo(0);
|
||||||
|
assertThat(out).exists();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void verify_wrongPublicKey_deletesOutputAndExitsNonZero() throws Exception {
|
||||||
|
KeyPair signing = KeyPairGenerator.getInstance("Ed25519").generateKeyPair();
|
||||||
|
KeyPair other = KeyPairGenerator.getInstance("Ed25519").generateKeyPair();
|
||||||
|
Path priv = tmp.resolve("priv.b64");
|
||||||
|
Path pub = tmp.resolve("pub.b64");
|
||||||
|
Files.writeString(priv, Base64.getEncoder().encodeToString(signing.getPrivate().getEncoded()));
|
||||||
|
Files.writeString(pub, Base64.getEncoder().encodeToString(other.getPublic().getEncoded()));
|
||||||
|
Path out = tmp.resolve("license.tok");
|
||||||
|
|
||||||
|
int code = LicenseMinterCli.run(new String[]{
|
||||||
|
"--private-key=" + priv,
|
||||||
|
"--public-key=" + pub,
|
||||||
|
"--tenant=acme",
|
||||||
|
"--expires=2099-12-31",
|
||||||
|
"--output=" + out,
|
||||||
|
"--verify"
|
||||||
|
});
|
||||||
|
|
||||||
|
assertThat(code).isNotZero();
|
||||||
|
assertThat(out).doesNotExist();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void verify_withoutPublicKey_fails() throws Exception {
|
||||||
|
KeyPair kp = KeyPairGenerator.getInstance("Ed25519").generateKeyPair();
|
||||||
|
Path priv = tmp.resolve("priv.b64");
|
||||||
|
Files.writeString(priv, Base64.getEncoder().encodeToString(kp.getPrivate().getEncoded()));
|
||||||
|
|
||||||
|
int code = LicenseMinterCli.run(new String[]{
|
||||||
|
"--private-key=" + priv,
|
||||||
|
"--tenant=acme",
|
||||||
|
"--expires=2099-12-31",
|
||||||
|
"--verify"
|
||||||
|
});
|
||||||
|
|
||||||
|
assertThat(code).isNotZero();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -5,7 +5,7 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>com.cameleer</groupId>
|
<groupId>io.cameleer</groupId>
|
||||||
<artifactId>cameleer-server-parent</artifactId>
|
<artifactId>cameleer-server-parent</artifactId>
|
||||||
<version>1.0-SNAPSHOT</version>
|
<version>1.0-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
@@ -16,9 +16,15 @@
|
|||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.cameleer</groupId>
|
<groupId>io.cameleer</groupId>
|
||||||
<artifactId>cameleer-server-core</artifactId>
|
<artifactId>cameleer-server-core</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.cameleer</groupId>
|
||||||
|
<artifactId>cameleer-license-minter</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
<artifactId>spring-boot-starter-web</artifactId>
|
<artifactId>spring-boot-starter-web</artifactId>
|
||||||
@@ -82,6 +88,11 @@
|
|||||||
<artifactId>org.eclipse.xtext.xbase.lib</artifactId>
|
<artifactId>org.eclipse.xtext.xbase.lib</artifactId>
|
||||||
<version>2.37.0</version>
|
<version>2.37.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.samskivert</groupId>
|
||||||
|
<artifactId>jmustache</artifactId>
|
||||||
|
<version>1.16</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
<artifactId>spring-boot-starter-validation</artifactId>
|
<artifactId>spring-boot-starter-validation</artifactId>
|
||||||
@@ -144,6 +155,12 @@
|
|||||||
<artifactId>awaitility</artifactId>
|
<artifactId>awaitility</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.wiremock</groupId>
|
||||||
|
<artifactId>wiremock-standalone</artifactId>
|
||||||
|
<version>3.9.1</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
@@ -178,8 +195,8 @@
|
|||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-surefire-plugin</artifactId>
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
<configuration>
|
<configuration>
|
||||||
<forkCount>1</forkCount>
|
<forkCount>1C</forkCount>
|
||||||
<reuseForks>false</reuseForks>
|
<reuseForks>true</reuseForks>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
|
|||||||
@@ -1,68 +0,0 @@
|
|||||||
package com.cameleer.server.app.config;
|
|
||||||
|
|
||||||
import com.cameleer.server.core.license.LicenseGate;
|
|
||||||
import com.cameleer.server.core.license.LicenseInfo;
|
|
||||||
import com.cameleer.server.core.license.LicenseValidator;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
|
||||||
import org.springframework.context.annotation.Bean;
|
|
||||||
import org.springframework.context.annotation.Configuration;
|
|
||||||
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
|
|
||||||
@Configuration
|
|
||||||
public class LicenseBeanConfig {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(LicenseBeanConfig.class);
|
|
||||||
|
|
||||||
@Value("${cameleer.server.license.token:}")
|
|
||||||
private String licenseToken;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.license.file:}")
|
|
||||||
private String licenseFile;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.license.publickey:}")
|
|
||||||
private String licensePublicKey;
|
|
||||||
|
|
||||||
@Bean
|
|
||||||
public LicenseGate licenseGate() {
|
|
||||||
LicenseGate gate = new LicenseGate();
|
|
||||||
|
|
||||||
String token = resolveLicenseToken();
|
|
||||||
if (token == null || token.isBlank()) {
|
|
||||||
log.info("No license configured — running in open mode (all features enabled)");
|
|
||||||
return gate;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (licensePublicKey == null || licensePublicKey.isBlank()) {
|
|
||||||
log.warn("License token provided but no public key configured (CAMELEER_SERVER_LICENSE_PUBLICKEY). Running in open mode.");
|
|
||||||
return gate;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
LicenseValidator validator = new LicenseValidator(licensePublicKey);
|
|
||||||
LicenseInfo info = validator.validate(token);
|
|
||||||
gate.load(info);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("Failed to validate license: {}. Running in open mode.", e.getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
return gate;
|
|
||||||
}
|
|
||||||
|
|
||||||
private String resolveLicenseToken() {
|
|
||||||
if (licenseToken != null && !licenseToken.isBlank()) {
|
|
||||||
return licenseToken;
|
|
||||||
}
|
|
||||||
if (licenseFile != null && !licenseFile.isBlank()) {
|
|
||||||
try {
|
|
||||||
return Files.readString(Path.of(licenseFile)).trim();
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.warn("Failed to read license file {}: {}", licenseFile, e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,87 +0,0 @@
|
|||||||
package com.cameleer.server.app.controller;
|
|
||||||
|
|
||||||
import com.cameleer.common.model.RouteExecution;
|
|
||||||
import com.cameleer.server.core.agent.AgentInfo;
|
|
||||||
import com.cameleer.server.core.agent.AgentRegistryService;
|
|
||||||
import com.cameleer.server.core.ingestion.ChunkAccumulator;
|
|
||||||
import com.cameleer.server.core.ingestion.IngestionService;
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
||||||
import com.fasterxml.jackson.core.type.TypeReference;
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import io.swagger.v3.oas.annotations.Operation;
|
|
||||||
import io.swagger.v3.oas.annotations.responses.ApiResponse;
|
|
||||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
|
||||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
|
|
||||||
import org.springframework.http.ResponseEntity;
|
|
||||||
import org.springframework.security.core.Authentication;
|
|
||||||
import org.springframework.security.core.context.SecurityContextHolder;
|
|
||||||
import org.springframework.web.bind.annotation.PostMapping;
|
|
||||||
import org.springframework.web.bind.annotation.RequestBody;
|
|
||||||
import org.springframework.web.bind.annotation.RequestMapping;
|
|
||||||
import org.springframework.web.bind.annotation.RestController;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Legacy ingestion endpoint for route execution data (PostgreSQL path).
|
|
||||||
* <p>
|
|
||||||
* Accepts both single {@link RouteExecution} and arrays. Data is written
|
|
||||||
* synchronously to PostgreSQL via {@link IngestionService}.
|
|
||||||
* <p>
|
|
||||||
* Only active when ClickHouse is disabled — when ClickHouse is enabled,
|
|
||||||
* {@link ChunkIngestionController} takes over the {@code /executions} mapping.
|
|
||||||
*/
|
|
||||||
@RestController
|
|
||||||
@RequestMapping("/api/v1/data")
|
|
||||||
@ConditionalOnMissingBean(ChunkAccumulator.class)
|
|
||||||
@Tag(name = "Ingestion", description = "Data ingestion endpoints")
|
|
||||||
public class ExecutionController {
|
|
||||||
|
|
||||||
private final IngestionService ingestionService;
|
|
||||||
private final AgentRegistryService registryService;
|
|
||||||
private final ObjectMapper objectMapper;
|
|
||||||
|
|
||||||
public ExecutionController(IngestionService ingestionService,
|
|
||||||
AgentRegistryService registryService,
|
|
||||||
ObjectMapper objectMapper) {
|
|
||||||
this.ingestionService = ingestionService;
|
|
||||||
this.registryService = registryService;
|
|
||||||
this.objectMapper = objectMapper;
|
|
||||||
}
|
|
||||||
|
|
||||||
@PostMapping("/executions")
|
|
||||||
@Operation(summary = "Ingest route execution data",
|
|
||||||
description = "Accepts a single RouteExecution or an array of RouteExecutions")
|
|
||||||
@ApiResponse(responseCode = "202", description = "Data accepted for processing")
|
|
||||||
public ResponseEntity<Void> ingestExecutions(@RequestBody String body) throws JsonProcessingException {
|
|
||||||
String instanceId = extractAgentId();
|
|
||||||
String applicationId = resolveApplicationId(instanceId);
|
|
||||||
List<RouteExecution> executions = parsePayload(body);
|
|
||||||
|
|
||||||
for (RouteExecution execution : executions) {
|
|
||||||
ingestionService.ingestExecution(instanceId, applicationId, execution);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ResponseEntity.accepted().build();
|
|
||||||
}
|
|
||||||
|
|
||||||
private String extractAgentId() {
|
|
||||||
Authentication auth = SecurityContextHolder.getContext().getAuthentication();
|
|
||||||
return auth != null ? auth.getName() : "";
|
|
||||||
}
|
|
||||||
|
|
||||||
private String resolveApplicationId(String instanceId) {
|
|
||||||
AgentInfo agent = registryService.findById(instanceId);
|
|
||||||
return agent != null ? agent.applicationId() : "";
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<RouteExecution> parsePayload(String body) throws JsonProcessingException {
|
|
||||||
String trimmed = body.strip();
|
|
||||||
if (trimmed.startsWith("[")) {
|
|
||||||
return objectMapper.readValue(trimmed, new TypeReference<>() {});
|
|
||||||
} else {
|
|
||||||
RouteExecution single = objectMapper.readValue(trimmed, RouteExecution.class);
|
|
||||||
return List.of(single);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
package com.cameleer.server.app.controller;
|
|
||||||
|
|
||||||
import com.cameleer.server.core.license.LicenseGate;
|
|
||||||
import com.cameleer.server.core.license.LicenseInfo;
|
|
||||||
import com.cameleer.server.core.license.LicenseValidator;
|
|
||||||
import io.swagger.v3.oas.annotations.Operation;
|
|
||||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
|
||||||
import org.springframework.http.ResponseEntity;
|
|
||||||
import org.springframework.security.access.prepost.PreAuthorize;
|
|
||||||
import org.springframework.web.bind.annotation.*;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@RestController
|
|
||||||
@RequestMapping("/api/v1/admin/license")
|
|
||||||
@PreAuthorize("hasRole('ADMIN')")
|
|
||||||
@Tag(name = "License Admin", description = "License management")
|
|
||||||
public class LicenseAdminController {
|
|
||||||
|
|
||||||
private final LicenseGate licenseGate;
|
|
||||||
private final String licensePublicKey;
|
|
||||||
|
|
||||||
public LicenseAdminController(LicenseGate licenseGate,
|
|
||||||
@Value("${cameleer.server.license.publickey:}") String licensePublicKey) {
|
|
||||||
this.licenseGate = licenseGate;
|
|
||||||
this.licensePublicKey = licensePublicKey;
|
|
||||||
}
|
|
||||||
|
|
||||||
@GetMapping
|
|
||||||
@Operation(summary = "Get current license info")
|
|
||||||
public ResponseEntity<LicenseInfo> getCurrent() {
|
|
||||||
return ResponseEntity.ok(licenseGate.getCurrent());
|
|
||||||
}
|
|
||||||
|
|
||||||
record UpdateLicenseRequest(String token) {}
|
|
||||||
|
|
||||||
@PostMapping
|
|
||||||
@Operation(summary = "Update license token at runtime")
|
|
||||||
public ResponseEntity<?> update(@RequestBody UpdateLicenseRequest request) {
|
|
||||||
if (licensePublicKey == null || licensePublicKey.isBlank()) {
|
|
||||||
return ResponseEntity.badRequest().body(Map.of("error", "No license public key configured"));
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
LicenseValidator validator = new LicenseValidator(licensePublicKey);
|
|
||||||
LicenseInfo info = validator.validate(request.token());
|
|
||||||
licenseGate.load(info);
|
|
||||||
return ResponseEntity.ok(info);
|
|
||||||
} catch (Exception e) {
|
|
||||||
return ResponseEntity.badRequest().body(Map.of("error", e.getMessage()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
package com.cameleer.server.app.dto;
|
|
||||||
|
|
||||||
import io.swagger.v3.oas.annotations.media.Schema;
|
|
||||||
|
|
||||||
import java.time.Instant;
|
|
||||||
|
|
||||||
@Schema(description = "Search indexer pipeline statistics")
|
|
||||||
public record IndexerPipelineResponse(
|
|
||||||
int queueDepth,
|
|
||||||
int maxQueueSize,
|
|
||||||
long failedCount,
|
|
||||||
long indexedCount,
|
|
||||||
long debounceMs,
|
|
||||||
double indexingRate,
|
|
||||||
Instant lastIndexedAt
|
|
||||||
) {}
|
|
||||||
@@ -1,408 +0,0 @@
|
|||||||
package com.cameleer.server.app.runtime;
|
|
||||||
|
|
||||||
import com.cameleer.server.app.metrics.ServerMetrics;
|
|
||||||
import com.cameleer.server.app.storage.PostgresDeploymentRepository;
|
|
||||||
import com.cameleer.server.core.runtime.*;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
|
||||||
import org.springframework.scheduling.annotation.Async;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
@Service
|
|
||||||
public class DeploymentExecutor {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DeploymentExecutor.class);
|
|
||||||
|
|
||||||
private final RuntimeOrchestrator orchestrator;
|
|
||||||
private final DeploymentService deploymentService;
|
|
||||||
private final AppService appService;
|
|
||||||
private final EnvironmentService envService;
|
|
||||||
private final DeploymentRepository deploymentRepository;
|
|
||||||
private final PostgresDeploymentRepository pgDeployRepo;
|
|
||||||
|
|
||||||
@Autowired(required = false)
|
|
||||||
private DockerNetworkManager networkManager;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.runtime.baseimage:gitea.siegeln.net/cameleer/cameleer-runtime-base:latest}")
|
|
||||||
private String baseImage;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.runtime.dockernetwork:cameleer}")
|
|
||||||
private String dockerNetwork;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.runtime.container.memorylimit:512m}")
|
|
||||||
private String globalMemoryLimit;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.runtime.container.cpushares:512}")
|
|
||||||
private int globalCpuShares;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.runtime.healthchecktimeout:60}")
|
|
||||||
private int healthCheckTimeout;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.runtime.agenthealthport:9464}")
|
|
||||||
private int agentHealthPort;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.security.bootstraptoken:}")
|
|
||||||
private String bootstrapToken;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.runtime.routingmode:path}")
|
|
||||||
private String globalRoutingMode;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.runtime.routingdomain:localhost}")
|
|
||||||
private String globalRoutingDomain;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.runtime.serverurl:}")
|
|
||||||
private String globalServerUrl;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.runtime.jardockervolume:}")
|
|
||||||
private String jarDockerVolume;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.runtime.jarstoragepath:/data/jars}")
|
|
||||||
private String jarStoragePath;
|
|
||||||
|
|
||||||
@Value("${cameleer.server.tenant.id:default}")
|
|
||||||
private String tenantId;
|
|
||||||
|
|
||||||
@Autowired
|
|
||||||
private ServerMetrics serverMetrics;
|
|
||||||
|
|
||||||
public DeploymentExecutor(RuntimeOrchestrator orchestrator,
|
|
||||||
DeploymentService deploymentService,
|
|
||||||
AppService appService,
|
|
||||||
EnvironmentService envService,
|
|
||||||
DeploymentRepository deploymentRepository) {
|
|
||||||
this.orchestrator = orchestrator;
|
|
||||||
this.deploymentService = deploymentService;
|
|
||||||
this.appService = appService;
|
|
||||||
this.envService = envService;
|
|
||||||
this.deploymentRepository = deploymentRepository;
|
|
||||||
this.pgDeployRepo = (PostgresDeploymentRepository) deploymentRepository;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Async("deploymentTaskExecutor")
|
|
||||||
public void executeAsync(Deployment deployment) {
|
|
||||||
long deployStart = System.currentTimeMillis();
|
|
||||||
try {
|
|
||||||
App app = appService.getById(deployment.appId());
|
|
||||||
Environment env = envService.getById(deployment.environmentId());
|
|
||||||
String jarPath = appService.resolveJarPath(deployment.appVersionId());
|
|
||||||
|
|
||||||
var globalDefaults = new ConfigMerger.GlobalRuntimeDefaults(
|
|
||||||
parseMemoryLimitMb(globalMemoryLimit),
|
|
||||||
globalCpuShares,
|
|
||||||
globalRoutingMode,
|
|
||||||
globalRoutingDomain,
|
|
||||||
globalServerUrl.isBlank() ? "http://cameleer-server:8081" : globalServerUrl
|
|
||||||
);
|
|
||||||
ResolvedContainerConfig config = ConfigMerger.resolve(
|
|
||||||
globalDefaults, env.defaultContainerConfig(), app.containerConfig());
|
|
||||||
|
|
||||||
pgDeployRepo.updateDeploymentStrategy(deployment.id(), config.deploymentStrategy());
|
|
||||||
pgDeployRepo.updateResolvedConfig(deployment.id(), resolvedConfigToMap(config));
|
|
||||||
|
|
||||||
// === PRE-FLIGHT ===
|
|
||||||
updateStage(deployment.id(), DeployStage.PRE_FLIGHT);
|
|
||||||
preFlightChecks(jarPath, config);
|
|
||||||
|
|
||||||
// Resolve runtime type
|
|
||||||
String resolvedRuntimeType = config.runtimeType();
|
|
||||||
String mainClass = null;
|
|
||||||
if ("auto".equalsIgnoreCase(resolvedRuntimeType)) {
|
|
||||||
AppVersion appVersion = appService.getVersion(deployment.appVersionId());
|
|
||||||
if (appVersion.detectedRuntimeType() == null) {
|
|
||||||
throw new IllegalStateException(
|
|
||||||
"Could not detect runtime type for JAR '" + appVersion.jarFilename() +
|
|
||||||
"'. Set runtimeType explicitly in app configuration.");
|
|
||||||
}
|
|
||||||
resolvedRuntimeType = appVersion.detectedRuntimeType();
|
|
||||||
mainClass = appVersion.detectedMainClass();
|
|
||||||
} else if ("plain-java".equals(resolvedRuntimeType)) {
|
|
||||||
AppVersion appVersion = appService.getVersion(deployment.appVersionId());
|
|
||||||
mainClass = appVersion.detectedMainClass();
|
|
||||||
if (mainClass == null) {
|
|
||||||
throw new IllegalStateException(
|
|
||||||
"Runtime type 'plain-java' requires a Main-Class in the JAR manifest, " +
|
|
||||||
"but none was detected for '" + appVersion.jarFilename() + "'.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// === PULL IMAGE ===
|
|
||||||
updateStage(deployment.id(), DeployStage.PULL_IMAGE);
|
|
||||||
orchestrator.pullImage(baseImage);
|
|
||||||
|
|
||||||
// === CREATE NETWORKS ===
|
|
||||||
updateStage(deployment.id(), DeployStage.CREATE_NETWORK);
|
|
||||||
// Primary network: use configured CAMELEER_DOCKER_NETWORK (tenant-isolated in SaaS mode)
|
|
||||||
String primaryNetwork = dockerNetwork;
|
|
||||||
String envNet = null;
|
|
||||||
List<String> additionalNets = new ArrayList<>();
|
|
||||||
if (networkManager != null) {
|
|
||||||
networkManager.ensureNetwork(primaryNetwork);
|
|
||||||
// Traefik network for routing (apps need to be reachable by Traefik)
|
|
||||||
networkManager.ensureNetwork(DockerNetworkManager.TRAEFIK_NETWORK);
|
|
||||||
additionalNets.add(DockerNetworkManager.TRAEFIK_NETWORK);
|
|
||||||
// Per-environment network scoped to tenant to prevent cross-tenant collisions
|
|
||||||
envNet = DockerNetworkManager.envNetworkName(tenantId, env.slug());
|
|
||||||
networkManager.ensureNetwork(envNet);
|
|
||||||
additionalNets.add(envNet);
|
|
||||||
}
|
|
||||||
|
|
||||||
// User-configured extra networks (e.g., monitoring)
|
|
||||||
if (networkManager != null && config.extraNetworks() != null) {
|
|
||||||
for (String net : config.extraNetworks()) {
|
|
||||||
if (!net.isBlank() && !additionalNets.contains(net)) {
|
|
||||||
networkManager.ensureNetwork(net);
|
|
||||||
additionalNets.add(net);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// === START REPLICAS ===
|
|
||||||
updateStage(deployment.id(), DeployStage.START_REPLICAS);
|
|
||||||
|
|
||||||
Map<String, String> baseEnvVars = buildEnvVars(app, env, config);
|
|
||||||
Map<String, String> prometheusLabels = PrometheusLabelBuilder.build(resolvedRuntimeType);
|
|
||||||
|
|
||||||
List<Map<String, Object>> replicaStates = new ArrayList<>();
|
|
||||||
List<String> newContainerIds = new ArrayList<>();
|
|
||||||
|
|
||||||
for (int i = 0; i < config.replicas(); i++) {
|
|
||||||
String instanceId = env.slug() + "-" + app.slug() + "-" + i;
|
|
||||||
String containerName = tenantId + "-" + instanceId;
|
|
||||||
|
|
||||||
// Per-replica labels (include replica index and instance-id)
|
|
||||||
Map<String, String> labels = TraefikLabelBuilder.build(app.slug(), env.slug(), tenantId, config, i);
|
|
||||||
labels.putAll(prometheusLabels);
|
|
||||||
|
|
||||||
// Per-replica env vars (set agent instance ID to match container log identity)
|
|
||||||
Map<String, String> replicaEnvVars = new LinkedHashMap<>(baseEnvVars);
|
|
||||||
replicaEnvVars.put("CAMELEER_AGENT_INSTANCEID", instanceId);
|
|
||||||
|
|
||||||
String volumeName = jarDockerVolume != null && !jarDockerVolume.isBlank() ? jarDockerVolume : null;
|
|
||||||
ContainerRequest request = new ContainerRequest(
|
|
||||||
containerName, baseImage, jarPath,
|
|
||||||
volumeName, jarStoragePath,
|
|
||||||
primaryNetwork,
|
|
||||||
additionalNets,
|
|
||||||
replicaEnvVars, labels,
|
|
||||||
config.memoryLimitBytes(), config.memoryReserveBytes(),
|
|
||||||
config.dockerCpuShares(), config.dockerCpuQuota(),
|
|
||||||
config.exposedPorts(), agentHealthPort,
|
|
||||||
"on-failure", 3,
|
|
||||||
resolvedRuntimeType, config.customArgs(), mainClass
|
|
||||||
);
|
|
||||||
|
|
||||||
String containerId = orchestrator.startContainer(request);
|
|
||||||
newContainerIds.add(containerId);
|
|
||||||
|
|
||||||
// Connect to additional networks after container is started
|
|
||||||
for (String net : additionalNets) {
|
|
||||||
if (networkManager != null) {
|
|
||||||
networkManager.connectContainer(containerId, net);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
orchestrator.startLogCapture(containerId, instanceId, app.slug(), env.slug(), tenantId);
|
|
||||||
|
|
||||||
replicaStates.add(Map.of(
|
|
||||||
"index", i,
|
|
||||||
"containerId", containerId,
|
|
||||||
"containerName", containerName,
|
|
||||||
"status", "STARTING"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
pgDeployRepo.updateReplicaStates(deployment.id(), replicaStates);
|
|
||||||
|
|
||||||
// === HEALTH CHECK ===
|
|
||||||
updateStage(deployment.id(), DeployStage.HEALTH_CHECK);
|
|
||||||
int healthyCount = waitForAnyHealthy(newContainerIds, healthCheckTimeout);
|
|
||||||
|
|
||||||
if (healthyCount == 0) {
|
|
||||||
for (String cid : newContainerIds) {
|
|
||||||
try { orchestrator.stopContainer(cid); orchestrator.removeContainer(cid); }
|
|
||||||
catch (Exception e) { log.warn("Cleanup failed for {}: {}", cid, e.getMessage()); }
|
|
||||||
}
|
|
||||||
pgDeployRepo.updateDeployStage(deployment.id(), null);
|
|
||||||
deploymentService.markFailed(deployment.id(), "No replicas passed health check within " + healthCheckTimeout + "s");
|
|
||||||
serverMetrics.recordDeploymentOutcome("FAILED");
|
|
||||||
serverMetrics.recordDeploymentDuration(deployStart);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
replicaStates = updateReplicaHealth(replicaStates, newContainerIds);
|
|
||||||
pgDeployRepo.updateReplicaStates(deployment.id(), replicaStates);
|
|
||||||
|
|
||||||
// === SWAP TRAFFIC ===
|
|
||||||
updateStage(deployment.id(), DeployStage.SWAP_TRAFFIC);
|
|
||||||
|
|
||||||
Optional<Deployment> existing = deploymentRepository.findActiveByAppIdAndEnvironmentId(
|
|
||||||
deployment.appId(), deployment.environmentId());
|
|
||||||
if (existing.isPresent() && !existing.get().id().equals(deployment.id())) {
|
|
||||||
stopDeploymentContainers(existing.get());
|
|
||||||
deploymentService.markStopped(existing.get().id());
|
|
||||||
log.info("Stopped previous deployment {} for replacement", existing.get().id());
|
|
||||||
}
|
|
||||||
|
|
||||||
// === COMPLETE ===
|
|
||||||
updateStage(deployment.id(), DeployStage.COMPLETE);
|
|
||||||
|
|
||||||
String primaryContainerId = newContainerIds.get(0);
|
|
||||||
DeploymentStatus finalStatus = healthyCount == config.replicas()
|
|
||||||
? DeploymentStatus.RUNNING : DeploymentStatus.DEGRADED;
|
|
||||||
deploymentService.markRunning(deployment.id(), primaryContainerId);
|
|
||||||
if (finalStatus == DeploymentStatus.DEGRADED) {
|
|
||||||
deploymentRepository.updateStatus(deployment.id(), DeploymentStatus.DEGRADED,
|
|
||||||
primaryContainerId, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
pgDeployRepo.updateDeployStage(deployment.id(), null);
|
|
||||||
serverMetrics.recordDeploymentOutcome(finalStatus.name());
|
|
||||||
serverMetrics.recordDeploymentDuration(deployStart);
|
|
||||||
log.info("Deployment {} is {} ({}/{} replicas healthy)",
|
|
||||||
deployment.id(), finalStatus, healthyCount, config.replicas());
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("Deployment {} FAILED: {}", deployment.id(), e.getMessage(), e);
|
|
||||||
pgDeployRepo.updateDeployStage(deployment.id(), null);
|
|
||||||
deploymentService.markFailed(deployment.id(), e.getMessage());
|
|
||||||
serverMetrics.recordDeploymentOutcome("FAILED");
|
|
||||||
serverMetrics.recordDeploymentDuration(deployStart);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void stopDeployment(Deployment deployment) {
|
|
||||||
pgDeployRepo.updateTargetState(deployment.id(), "STOPPED");
|
|
||||||
deploymentRepository.updateStatus(deployment.id(), DeploymentStatus.STOPPING,
|
|
||||||
deployment.containerId(), null);
|
|
||||||
|
|
||||||
stopDeploymentContainers(deployment);
|
|
||||||
deploymentService.markStopped(deployment.id());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void stopDeploymentContainers(Deployment deployment) {
|
|
||||||
List<Map<String, Object>> replicas = deployment.replicaStates() != null
|
|
||||||
? deployment.replicaStates() : List.of();
|
|
||||||
for (Map<String, Object> replica : replicas) {
|
|
||||||
String cid = (String) replica.get("containerId");
|
|
||||||
if (cid != null) {
|
|
||||||
try {
|
|
||||||
orchestrator.stopContainer(cid);
|
|
||||||
orchestrator.removeContainer(cid);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.warn("Failed to stop replica container {}: {}", cid, e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (deployment.containerId() != null && replicas.isEmpty()) {
|
|
||||||
try {
|
|
||||||
orchestrator.stopContainer(deployment.containerId());
|
|
||||||
orchestrator.removeContainer(deployment.containerId());
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.warn("Failed to stop container {}: {}", deployment.containerId(), e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void preFlightChecks(String jarPath, ResolvedContainerConfig config) {
|
|
||||||
if (!Files.exists(Path.of(jarPath))) {
|
|
||||||
throw new IllegalStateException("JAR file not found: " + jarPath);
|
|
||||||
}
|
|
||||||
if (config.memoryLimitMb() <= 0) {
|
|
||||||
throw new IllegalStateException("Memory limit must be positive, got: " + config.memoryLimitMb());
|
|
||||||
}
|
|
||||||
if (config.appPort() <= 0 || config.appPort() > 65535) {
|
|
||||||
throw new IllegalStateException("Invalid app port: " + config.appPort());
|
|
||||||
}
|
|
||||||
if (config.replicas() < 1) {
|
|
||||||
throw new IllegalStateException("Replicas must be >= 1, got: " + config.replicas());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private Map<String, String> buildEnvVars(App app, Environment env, ResolvedContainerConfig config) {
|
|
||||||
Map<String, String> envVars = new LinkedHashMap<>();
|
|
||||||
envVars.put("CAMELEER_AGENT_EXPORT_TYPE", "HTTP");
|
|
||||||
envVars.put("CAMELEER_AGENT_APPLICATION", app.slug());
|
|
||||||
envVars.put("CAMELEER_AGENT_ENVIRONMENT", env.slug());
|
|
||||||
envVars.put("CAMELEER_AGENT_EXPORT_ENDPOINT", config.serverUrl());
|
|
||||||
envVars.put("CAMELEER_AGENT_ROUTECONTROL_ENABLED", String.valueOf(config.routeControlEnabled()));
|
|
||||||
envVars.put("CAMELEER_AGENT_REPLAY_ENABLED", String.valueOf(config.replayEnabled()));
|
|
||||||
envVars.put("CAMELEER_AGENT_HEALTH_ENABLED", "true");
|
|
||||||
envVars.put("CAMELEER_AGENT_HEALTH_PORT", String.valueOf(agentHealthPort));
|
|
||||||
if (bootstrapToken != null && !bootstrapToken.isBlank()) {
|
|
||||||
envVars.put("CAMELEER_AGENT_AUTH_TOKEN", bootstrapToken);
|
|
||||||
}
|
|
||||||
envVars.putAll(config.customEnvVars());
|
|
||||||
return envVars;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int waitForAnyHealthy(List<String> containerIds, int timeoutSeconds) {
|
|
||||||
long deadline = System.currentTimeMillis() + (timeoutSeconds * 1000L);
|
|
||||||
int lastHealthy = 0;
|
|
||||||
while (System.currentTimeMillis() < deadline) {
|
|
||||||
int healthy = 0;
|
|
||||||
for (String cid : containerIds) {
|
|
||||||
ContainerStatus status = orchestrator.getContainerStatus(cid);
|
|
||||||
if ("healthy".equals(status.state())) healthy++;
|
|
||||||
}
|
|
||||||
lastHealthy = healthy;
|
|
||||||
if (healthy == containerIds.size()) return healthy;
|
|
||||||
try { Thread.sleep(2000); } catch (InterruptedException e) {
|
|
||||||
Thread.currentThread().interrupt();
|
|
||||||
return lastHealthy;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return lastHealthy;
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<Map<String, Object>> updateReplicaHealth(List<Map<String, Object>> replicas,
|
|
||||||
List<String> containerIds) {
|
|
||||||
List<Map<String, Object>> updated = new ArrayList<>();
|
|
||||||
for (Map<String, Object> replica : replicas) {
|
|
||||||
String cid = (String) replica.get("containerId");
|
|
||||||
ContainerStatus status = orchestrator.getContainerStatus(cid);
|
|
||||||
Map<String, Object> copy = new HashMap<>(replica);
|
|
||||||
copy.put("status", status.running() ? "RUNNING" : "DEAD");
|
|
||||||
updated.add(copy);
|
|
||||||
}
|
|
||||||
return updated;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateStage(UUID deploymentId, DeployStage stage) {
|
|
||||||
pgDeployRepo.updateDeployStage(deploymentId, stage.name());
|
|
||||||
}
|
|
||||||
|
|
||||||
private int parseMemoryLimitMb(String limit) {
|
|
||||||
limit = limit.trim().toLowerCase();
|
|
||||||
if (limit.endsWith("g")) return (int) (Double.parseDouble(limit.replace("g", "")) * 1024);
|
|
||||||
if (limit.endsWith("m")) return (int) Double.parseDouble(limit.replace("m", ""));
|
|
||||||
return Integer.parseInt(limit);
|
|
||||||
}
|
|
||||||
|
|
||||||
private Map<String, Object> resolvedConfigToMap(ResolvedContainerConfig config) {
|
|
||||||
Map<String, Object> map = new LinkedHashMap<>();
|
|
||||||
map.put("memoryLimitMb", config.memoryLimitMb());
|
|
||||||
if (config.memoryReserveMb() != null) map.put("memoryReserveMb", config.memoryReserveMb());
|
|
||||||
map.put("cpuRequest", config.cpuRequest());
|
|
||||||
if (config.cpuLimit() != null) map.put("cpuLimit", config.cpuLimit());
|
|
||||||
map.put("appPort", config.appPort());
|
|
||||||
map.put("exposedPorts", config.exposedPorts());
|
|
||||||
map.put("customEnvVars", config.customEnvVars());
|
|
||||||
map.put("stripPathPrefix", config.stripPathPrefix());
|
|
||||||
map.put("sslOffloading", config.sslOffloading());
|
|
||||||
map.put("routingMode", config.routingMode());
|
|
||||||
map.put("routingDomain", config.routingDomain());
|
|
||||||
map.put("serverUrl", config.serverUrl());
|
|
||||||
map.put("replicas", config.replicas());
|
|
||||||
map.put("deploymentStrategy", config.deploymentStrategy());
|
|
||||||
map.put("runtimeType", config.runtimeType());
|
|
||||||
map.put("customArgs", config.customArgs());
|
|
||||||
map.put("extraNetworks", config.extraNetworks());
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,219 +0,0 @@
|
|||||||
package com.cameleer.server.app.runtime;
|
|
||||||
|
|
||||||
import com.cameleer.server.core.runtime.ContainerRequest;
|
|
||||||
import com.cameleer.server.core.runtime.ContainerStatus;
|
|
||||||
import com.cameleer.server.core.runtime.RuntimeOrchestrator;
|
|
||||||
import com.github.dockerjava.api.DockerClient;
|
|
||||||
import com.github.dockerjava.api.async.ResultCallback;
|
|
||||||
import com.github.dockerjava.api.model.AccessMode;
|
|
||||||
import com.github.dockerjava.api.model.Bind;
|
|
||||||
import com.github.dockerjava.api.model.Frame;
|
|
||||||
import com.github.dockerjava.api.model.HealthCheck;
|
|
||||||
import com.github.dockerjava.api.model.HostConfig;
|
|
||||||
import com.github.dockerjava.api.model.RestartPolicy;
|
|
||||||
import com.github.dockerjava.api.model.Volume;
|
|
||||||
import jakarta.annotation.PreDestroy;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
public class DockerRuntimeOrchestrator implements RuntimeOrchestrator {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DockerRuntimeOrchestrator.class);
|
|
||||||
private final DockerClient dockerClient;
|
|
||||||
|
|
||||||
private ContainerLogForwarder logForwarder;
|
|
||||||
|
|
||||||
public DockerRuntimeOrchestrator(DockerClient dockerClient) {
|
|
||||||
this.dockerClient = dockerClient;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setLogForwarder(ContainerLogForwarder logForwarder) {
|
|
||||||
this.logForwarder = logForwarder;
|
|
||||||
}
|
|
||||||
|
|
||||||
@PreDestroy
|
|
||||||
public void close() throws IOException {
|
|
||||||
if (dockerClient != null) {
|
|
||||||
dockerClient.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isEnabled() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void pullImage(String image) {
|
|
||||||
try {
|
|
||||||
log.info("Pulling image {}", image);
|
|
||||||
dockerClient.pullImageCmd(image).start().awaitCompletion();
|
|
||||||
log.info("Image pulled: {}", image);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
Thread.currentThread().interrupt();
|
|
||||||
log.warn("Image pull interrupted for {}", image);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.warn("Failed to pull image {} (will use local cache if available): {}", image, e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String startContainer(ContainerRequest request) {
|
|
||||||
List<String> envList = request.envVars().entrySet().stream()
|
|
||||||
.map(e -> e.getKey() + "=" + e.getValue()).toList();
|
|
||||||
|
|
||||||
HostConfig hostConfig = HostConfig.newHostConfig()
|
|
||||||
.withMemory(request.memoryLimitBytes())
|
|
||||||
.withMemorySwap(request.memoryLimitBytes())
|
|
||||||
.withCpuShares(request.cpuShares())
|
|
||||||
.withNetworkMode(request.network())
|
|
||||||
.withRestartPolicy(RestartPolicy.onFailureRestart(request.restartPolicyMaxRetries()));
|
|
||||||
|
|
||||||
// JAR mounting: volume mount (Docker-in-Docker) or bind mount (host path)
|
|
||||||
if (request.jarVolumeName() != null && !request.jarVolumeName().isBlank()) {
|
|
||||||
// Mount the named volume at the jar storage base path
|
|
||||||
Bind volumeBind = new Bind(request.jarVolumeName(), new Volume(request.jarVolumeMountPath()), AccessMode.ro);
|
|
||||||
hostConfig.withBinds(volumeBind);
|
|
||||||
} else {
|
|
||||||
Bind jarBind = new Bind(request.jarPath(), new Volume("/app/app.jar"), AccessMode.ro);
|
|
||||||
hostConfig.withBinds(jarBind);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (request.memoryReserveBytes() != null) {
|
|
||||||
hostConfig.withMemoryReservation(request.memoryReserveBytes());
|
|
||||||
}
|
|
||||||
if (request.cpuQuota() != null) {
|
|
||||||
hostConfig.withCpuQuota(request.cpuQuota());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resolve the JAR path for the entrypoint
|
|
||||||
String appJarPath;
|
|
||||||
if (request.jarVolumeName() != null && !request.jarVolumeName().isBlank()) {
|
|
||||||
appJarPath = request.jarPath();
|
|
||||||
} else {
|
|
||||||
appJarPath = "/app/app.jar";
|
|
||||||
}
|
|
||||||
|
|
||||||
var createCmd = dockerClient.createContainerCmd(request.baseImage())
|
|
||||||
.withName(request.containerName())
|
|
||||||
.withEnv(envList)
|
|
||||||
.withLabels(request.labels() != null ? request.labels() : Map.of())
|
|
||||||
.withHostConfig(hostConfig)
|
|
||||||
.withHealthcheck(new HealthCheck()
|
|
||||||
.withTest(List.of("CMD-SHELL",
|
|
||||||
"wget -qO- http://localhost:" + request.healthCheckPort() + "/cameleer/health || exit 1"))
|
|
||||||
.withInterval(10_000_000_000L)
|
|
||||||
.withTimeout(5_000_000_000L)
|
|
||||||
.withRetries(3)
|
|
||||||
.withStartPeriod(30_000_000_000L));
|
|
||||||
|
|
||||||
// Build entrypoint based on runtime type
|
|
||||||
String customArgs = request.customArgs() != null && !request.customArgs().isBlank()
|
|
||||||
? " " + request.customArgs() : "";
|
|
||||||
String entrypoint = switch (request.runtimeType()) {
|
|
||||||
case "plain-java" -> "exec java -javaagent:/app/agent.jar" + customArgs +
|
|
||||||
" -cp " + appJarPath + " " + request.mainClass();
|
|
||||||
case "native" -> "exec " + appJarPath + customArgs;
|
|
||||||
default -> // spring-boot, quarkus, and others all use -jar
|
|
||||||
"exec java -javaagent:/app/agent.jar" + customArgs + " -jar " + appJarPath;
|
|
||||||
};
|
|
||||||
createCmd.withEntrypoint("sh", "-c", entrypoint);
|
|
||||||
|
|
||||||
if (request.exposedPorts() != null && !request.exposedPorts().isEmpty()) {
|
|
||||||
var ports = request.exposedPorts().stream()
|
|
||||||
.map(p -> com.github.dockerjava.api.model.ExposedPort.tcp(p))
|
|
||||||
.toArray(com.github.dockerjava.api.model.ExposedPort[]::new);
|
|
||||||
createCmd.withExposedPorts(ports);
|
|
||||||
}
|
|
||||||
|
|
||||||
var container = createCmd.exec();
|
|
||||||
dockerClient.startContainerCmd(container.getId()).exec();
|
|
||||||
|
|
||||||
log.info("Started container {} ({})", request.containerName(), container.getId());
|
|
||||||
return container.getId();
|
|
||||||
}
|
|
||||||
|
|
||||||
public DockerClient getDockerClient() {
|
|
||||||
return dockerClient;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void stopContainer(String containerId) {
|
|
||||||
try {
|
|
||||||
dockerClient.stopContainerCmd(containerId).withTimeout(30).exec();
|
|
||||||
log.info("Stopped container {}", containerId);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.warn("Failed to stop container {}: {}", containerId, e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void removeContainer(String containerId) {
|
|
||||||
try {
|
|
||||||
dockerClient.removeContainerCmd(containerId).withForce(true).exec();
|
|
||||||
log.info("Removed container {}", containerId);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.warn("Failed to remove container {}: {}", containerId, e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ContainerStatus getContainerStatus(String containerId) {
|
|
||||||
try {
|
|
||||||
var inspection = dockerClient.inspectContainerCmd(containerId).exec();
|
|
||||||
var state = inspection.getState();
|
|
||||||
var health = state.getHealth();
|
|
||||||
var healthStatus = health != null ? health.getStatus() : null;
|
|
||||||
// Use health status if available, otherwise fall back to container state
|
|
||||||
var effectiveState = healthStatus != null ? healthStatus : state.getStatus();
|
|
||||||
return new ContainerStatus(
|
|
||||||
effectiveState,
|
|
||||||
Boolean.TRUE.equals(state.getRunning()),
|
|
||||||
state.getExitCodeLong() != null ? state.getExitCodeLong().intValue() : 0,
|
|
||||||
state.getError());
|
|
||||||
} catch (Exception e) {
|
|
||||||
return new ContainerStatus("not_found", false, -1, e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<String> getLogs(String containerId, int tailLines) {
|
|
||||||
List<String> logLines = new ArrayList<>();
|
|
||||||
try {
|
|
||||||
dockerClient.logContainerCmd(containerId)
|
|
||||||
.withStdOut(true)
|
|
||||||
.withStdErr(true)
|
|
||||||
.withTail(tailLines)
|
|
||||||
.withTimestamps(true)
|
|
||||||
.exec(new ResultCallback.Adapter<Frame>() {
|
|
||||||
@Override
|
|
||||||
public void onNext(Frame frame) {
|
|
||||||
logLines.add(new String(frame.getPayload()).trim());
|
|
||||||
}
|
|
||||||
}).awaitCompletion();
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.warn("Failed to get logs for container {}: {}", containerId, e.getMessage());
|
|
||||||
}
|
|
||||||
return logLines.stream();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void startLogCapture(String containerId, String instanceId, String appSlug, String envSlug, String tenantId) {
|
|
||||||
if (logForwarder != null) {
|
|
||||||
logForwarder.startCapture(containerId, instanceId, appSlug, envSlug, tenantId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void stopLogCapture(String containerId) {
|
|
||||||
if (logForwarder != null) {
|
|
||||||
logForwarder.stopCapture(containerId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,78 +0,0 @@
|
|||||||
package com.cameleer.server.app.storage;
|
|
||||||
|
|
||||||
import com.cameleer.server.core.agent.AgentEventRecord;
|
|
||||||
import com.cameleer.server.core.agent.AgentEventRepository;
|
|
||||||
import org.springframework.jdbc.core.JdbcTemplate;
|
|
||||||
|
|
||||||
import java.sql.Timestamp;
|
|
||||||
import java.time.Instant;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* ClickHouse implementation of {@link AgentEventRepository}.
|
|
||||||
* <p>
|
|
||||||
* The ClickHouse table has no {@code id} column (no BIGSERIAL equivalent),
|
|
||||||
* so all returned {@link AgentEventRecord} instances have {@code id = 0}.
|
|
||||||
*/
|
|
||||||
public class ClickHouseAgentEventRepository implements AgentEventRepository {
|
|
||||||
|
|
||||||
private static final String INSERT_SQL =
|
|
||||||
"INSERT INTO agent_events (tenant_id, instance_id, application_id, environment, event_type, detail) VALUES (?, ?, ?, ?, ?, ?)";
|
|
||||||
|
|
||||||
private static final String SELECT_BASE =
|
|
||||||
"SELECT 0 AS id, instance_id, application_id, event_type, detail, timestamp FROM agent_events WHERE tenant_id = ?";
|
|
||||||
|
|
||||||
private final String tenantId;
|
|
||||||
private final JdbcTemplate jdbc;
|
|
||||||
|
|
||||||
public ClickHouseAgentEventRepository(String tenantId, JdbcTemplate jdbc) {
|
|
||||||
this.tenantId = tenantId;
|
|
||||||
this.jdbc = jdbc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void insert(String instanceId, String applicationId, String environment, String eventType, String detail) {
|
|
||||||
jdbc.update(INSERT_SQL, tenantId, instanceId, applicationId,
|
|
||||||
environment != null ? environment : "default", eventType, detail);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<AgentEventRecord> query(String applicationId, String instanceId, String environment, Instant from, Instant to, int limit) {
|
|
||||||
var sql = new StringBuilder(SELECT_BASE);
|
|
||||||
var params = new ArrayList<Object>();
|
|
||||||
params.add(tenantId);
|
|
||||||
|
|
||||||
if (applicationId != null) {
|
|
||||||
sql.append(" AND application_id = ?");
|
|
||||||
params.add(applicationId);
|
|
||||||
}
|
|
||||||
if (instanceId != null) {
|
|
||||||
sql.append(" AND instance_id = ?");
|
|
||||||
params.add(instanceId);
|
|
||||||
}
|
|
||||||
if (environment != null) {
|
|
||||||
sql.append(" AND environment = ?");
|
|
||||||
params.add(environment);
|
|
||||||
}
|
|
||||||
if (from != null) {
|
|
||||||
sql.append(" AND timestamp >= ?");
|
|
||||||
params.add(Timestamp.from(from));
|
|
||||||
}
|
|
||||||
if (to != null) {
|
|
||||||
sql.append(" AND timestamp < ?");
|
|
||||||
params.add(Timestamp.from(to));
|
|
||||||
}
|
|
||||||
sql.append(" ORDER BY timestamp DESC LIMIT ?");
|
|
||||||
params.add(limit);
|
|
||||||
|
|
||||||
return jdbc.query(sql.toString(), (rs, rowNum) -> new AgentEventRecord(
|
|
||||||
rs.getLong("id"),
|
|
||||||
rs.getString("instance_id"),
|
|
||||||
rs.getString("application_id"),
|
|
||||||
rs.getString("event_type"),
|
|
||||||
rs.getString("detail"),
|
|
||||||
rs.getTimestamp("timestamp").toInstant()
|
|
||||||
), params.toArray());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,21 +1,23 @@
|
|||||||
package com.cameleer.server.app;
|
package io.cameleer.server.app;
|
||||||
|
|
||||||
import com.cameleer.server.app.config.AgentRegistryConfig;
|
import io.cameleer.server.app.config.AgentRegistryConfig;
|
||||||
import com.cameleer.server.app.config.IngestionConfig;
|
import io.cameleer.server.app.config.IngestionConfig;
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
||||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||||
import org.springframework.scheduling.annotation.EnableAsync;
|
import org.springframework.scheduling.annotation.EnableAsync;
|
||||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||||
|
|
||||||
|
import java.util.TimeZone;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main entry point for the Cameleer Server application.
|
* Main entry point for the Cameleer Server application.
|
||||||
* <p>
|
* <p>
|
||||||
* Scans {@code com.cameleer.server.app} and {@code com.cameleer.server.core} packages.
|
* Scans {@code io.cameleer.server.app} and {@code io.cameleer.server.core} packages.
|
||||||
*/
|
*/
|
||||||
@SpringBootApplication(scanBasePackages = {
|
@SpringBootApplication(scanBasePackages = {
|
||||||
"com.cameleer.server.app",
|
"io.cameleer.server.app",
|
||||||
"com.cameleer.server.core"
|
"io.cameleer.server.core"
|
||||||
})
|
})
|
||||||
@EnableAsync
|
@EnableAsync
|
||||||
@EnableScheduling
|
@EnableScheduling
|
||||||
@@ -23,6 +25,11 @@ import org.springframework.scheduling.annotation.EnableScheduling;
|
|||||||
public class CameleerServerApplication {
|
public class CameleerServerApplication {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
// Pin JVM default TZ to UTC. The ClickHouse JDBC driver formats
|
||||||
|
// java.sql.Timestamp via toString() which uses JVM default TZ; a
|
||||||
|
// non-UTC JVM would then send CH timestamps off by the TZ offset.
|
||||||
|
// Standard practice for observability servers.
|
||||||
|
TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
|
||||||
SpringApplication.run(CameleerServerApplication.class, args);
|
SpringApplication.run(CameleerServerApplication.class, args);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
package com.cameleer.server.app.agent;
|
package io.cameleer.server.app.agent;
|
||||||
|
|
||||||
import com.cameleer.server.app.metrics.ServerMetrics;
|
import io.cameleer.server.app.metrics.ServerMetrics;
|
||||||
import com.cameleer.server.core.agent.AgentEventService;
|
import io.cameleer.server.core.agent.AgentEventService;
|
||||||
import com.cameleer.server.core.agent.AgentInfo;
|
import io.cameleer.server.core.agent.AgentInfo;
|
||||||
import com.cameleer.server.core.agent.AgentRegistryService;
|
import io.cameleer.server.core.agent.AgentRegistryService;
|
||||||
import com.cameleer.server.core.agent.AgentState;
|
import io.cameleer.server.core.agent.AgentState;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.scheduling.annotation.Scheduled;
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
@@ -70,7 +70,7 @@ public class AgentLifecycleMonitor {
|
|||||||
private String mapTransitionEvent(AgentState from, AgentState to) {
|
private String mapTransitionEvent(AgentState from, AgentState to) {
|
||||||
if (from == AgentState.LIVE && to == AgentState.STALE) return "WENT_STALE";
|
if (from == AgentState.LIVE && to == AgentState.STALE) return "WENT_STALE";
|
||||||
if (from == AgentState.STALE && to == AgentState.DEAD) return "WENT_DEAD";
|
if (from == AgentState.STALE && to == AgentState.DEAD) return "WENT_DEAD";
|
||||||
if (from == AgentState.STALE && to == AgentState.LIVE) return "RECOVERED";
|
if (to == AgentState.LIVE && (from == AgentState.STALE || from == AgentState.DEAD)) return "RECOVERED";
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
package com.cameleer.server.app.agent;
|
package io.cameleer.server.app.agent;
|
||||||
|
|
||||||
import com.cameleer.server.app.config.AgentRegistryConfig;
|
import io.cameleer.server.app.config.AgentRegistryConfig;
|
||||||
import com.cameleer.server.core.agent.AgentCommand;
|
import io.cameleer.server.core.agent.AgentCommand;
|
||||||
import com.cameleer.server.core.agent.AgentEventListener;
|
import io.cameleer.server.core.agent.AgentEventListener;
|
||||||
import com.cameleer.server.core.agent.AgentRegistryService;
|
import io.cameleer.server.core.agent.AgentRegistryService;
|
||||||
import com.fasterxml.jackson.databind.JsonNode;
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import jakarta.annotation.PostConstruct;
|
import jakarta.annotation.PostConstruct;
|
||||||
@@ -80,6 +80,17 @@ public class SseConnectionManager implements AgentEventListener {
|
|||||||
log.debug("SSE connection error for agent {}: {}", agentId, ex.getMessage());
|
log.debug("SSE connection error for agent {}: {}", agentId, ex.getMessage());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Send an initial keepalive comment so Spring flushes the response
|
||||||
|
// headers immediately. Without this, clients blocking on the first
|
||||||
|
// body byte can hang for a full ping interval before observing the
|
||||||
|
// connection — surface symptom in ITs that assert awaitConnection().
|
||||||
|
try {
|
||||||
|
emitter.send(SseEmitter.event().comment("connected"));
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.debug("Initial keepalive failed for agent {}: {}", agentId, e.getMessage());
|
||||||
|
emitters.remove(agentId, emitter);
|
||||||
|
}
|
||||||
|
|
||||||
log.info("SSE connection established for agent {}", agentId);
|
log.info("SSE connection established for agent {}", agentId);
|
||||||
|
|
||||||
return emitter;
|
return emitter;
|
||||||
@@ -169,7 +180,7 @@ public class SseConnectionManager implements AgentEventListener {
|
|||||||
/**
|
/**
|
||||||
* Scheduled ping keepalive to all connected agents.
|
* Scheduled ping keepalive to all connected agents.
|
||||||
*/
|
*/
|
||||||
@Scheduled(fixedDelayString = "${agent-registry.ping-interval-ms:15000}")
|
@Scheduled(fixedDelayString = "${cameleer.server.agentregistry.ping-interval-ms:15000}")
|
||||||
void pingAll() {
|
void pingAll() {
|
||||||
if (!emitters.isEmpty()) {
|
if (!emitters.isEmpty()) {
|
||||||
sendPingToAll();
|
sendPingToAll();
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
package com.cameleer.server.app.agent;
|
package io.cameleer.server.app.agent;
|
||||||
|
|
||||||
import com.cameleer.server.core.security.Ed25519SigningService;
|
import io.cameleer.server.core.security.Ed25519SigningService;
|
||||||
import com.fasterxml.jackson.databind.JsonNode;
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||||
@@ -0,0 +1,78 @@
|
|||||||
|
package io.cameleer.server.app.alerting.config;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.eval.PerKindCircuitBreaker;
|
||||||
|
import io.cameleer.server.app.alerting.metrics.AlertingMetrics;
|
||||||
|
import io.cameleer.server.app.alerting.storage.*;
|
||||||
|
import io.cameleer.server.core.alerting.AlertInstanceRepository;
|
||||||
|
import io.cameleer.server.core.alerting.AlertNotificationRepository;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRuleRepository;
|
||||||
|
import io.cameleer.server.core.alerting.AlertSilenceRepository;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
|
|
||||||
|
import java.net.InetAddress;
|
||||||
|
import java.time.Clock;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@EnableConfigurationProperties(AlertingProperties.class)
|
||||||
|
public class AlertingBeanConfig {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(AlertingBeanConfig.class);
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public AlertRuleRepository alertRuleRepository(JdbcTemplate jdbc, ObjectMapper om) {
|
||||||
|
return new PostgresAlertRuleRepository(jdbc, om);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public AlertInstanceRepository alertInstanceRepository(JdbcTemplate jdbc, ObjectMapper om) {
|
||||||
|
return new PostgresAlertInstanceRepository(jdbc, om);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public AlertSilenceRepository alertSilenceRepository(JdbcTemplate jdbc, ObjectMapper om) {
|
||||||
|
return new PostgresAlertSilenceRepository(jdbc, om);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public AlertNotificationRepository alertNotificationRepository(JdbcTemplate jdbc, ObjectMapper om) {
|
||||||
|
return new PostgresAlertNotificationRepository(jdbc, om);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public Clock alertingClock() {
|
||||||
|
return Clock.systemDefaultZone();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean("alertingInstanceId")
|
||||||
|
public String alertingInstanceId() {
|
||||||
|
String hostname;
|
||||||
|
try {
|
||||||
|
hostname = InetAddress.getLocalHost().getHostName();
|
||||||
|
} catch (Exception e) {
|
||||||
|
hostname = "unknown";
|
||||||
|
}
|
||||||
|
return hostname + ":" + ProcessHandle.current().pid();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public PerKindCircuitBreaker perKindCircuitBreaker(AlertingProperties props,
|
||||||
|
AlertingMetrics alertingMetrics) {
|
||||||
|
if (props.evaluatorTickIntervalMs() != null
|
||||||
|
&& props.evaluatorTickIntervalMs() < 5000) {
|
||||||
|
log.warn("cameleer.server.alerting.evaluatorTickIntervalMs={} is below the 5000 ms floor; clamping to 5000 ms",
|
||||||
|
props.evaluatorTickIntervalMs());
|
||||||
|
}
|
||||||
|
PerKindCircuitBreaker breaker = new PerKindCircuitBreaker(
|
||||||
|
props.cbFailThreshold(),
|
||||||
|
props.cbWindowSeconds(),
|
||||||
|
props.cbCooldownSeconds());
|
||||||
|
breaker.setMetrics(alertingMetrics);
|
||||||
|
return breaker;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
package io.cameleer.server.app.alerting.config;
|
||||||
|
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
|
||||||
|
@ConfigurationProperties("cameleer.server.alerting")
|
||||||
|
public record AlertingProperties(
|
||||||
|
Integer evaluatorTickIntervalMs,
|
||||||
|
Integer evaluatorBatchSize,
|
||||||
|
Integer claimTtlSeconds,
|
||||||
|
Integer notificationTickIntervalMs,
|
||||||
|
Integer notificationBatchSize,
|
||||||
|
Boolean inTickCacheEnabled,
|
||||||
|
Integer circuitBreakerFailThreshold,
|
||||||
|
Integer circuitBreakerWindowSeconds,
|
||||||
|
Integer circuitBreakerCooldownSeconds,
|
||||||
|
Integer eventRetentionDays,
|
||||||
|
Integer notificationRetentionDays,
|
||||||
|
Integer webhookTimeoutMs,
|
||||||
|
Integer webhookMaxAttempts,
|
||||||
|
Integer perExchangeDeployBacklogCapSeconds) {
|
||||||
|
|
||||||
|
public int effectiveEvaluatorTickIntervalMs() {
|
||||||
|
int raw = evaluatorTickIntervalMs == null ? 5000 : evaluatorTickIntervalMs;
|
||||||
|
return Math.max(5000, raw); // floor: no faster than 5 s
|
||||||
|
}
|
||||||
|
|
||||||
|
public int effectiveEvaluatorBatchSize() {
|
||||||
|
return evaluatorBatchSize == null ? 20 : evaluatorBatchSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int effectiveClaimTtlSeconds() {
|
||||||
|
return claimTtlSeconds == null ? 30 : claimTtlSeconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int effectiveNotificationTickIntervalMs() {
|
||||||
|
return notificationTickIntervalMs == null ? 5000 : notificationTickIntervalMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int effectiveNotificationBatchSize() {
|
||||||
|
return notificationBatchSize == null ? 50 : notificationBatchSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean effectiveInTickCacheEnabled() {
|
||||||
|
return inTickCacheEnabled == null || inTickCacheEnabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int effectiveEventRetentionDays() {
|
||||||
|
return eventRetentionDays == null ? 90 : eventRetentionDays;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int effectiveNotificationRetentionDays() {
|
||||||
|
return notificationRetentionDays == null ? 30 : notificationRetentionDays;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int effectiveWebhookTimeoutMs() {
|
||||||
|
return webhookTimeoutMs == null ? 5000 : webhookTimeoutMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int effectiveWebhookMaxAttempts() {
|
||||||
|
return webhookMaxAttempts == null ? 3 : webhookMaxAttempts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int cbFailThreshold() {
|
||||||
|
return circuitBreakerFailThreshold == null ? 5 : circuitBreakerFailThreshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int cbWindowSeconds() {
|
||||||
|
return circuitBreakerWindowSeconds == null ? 30 : circuitBreakerWindowSeconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int cbCooldownSeconds() {
|
||||||
|
return circuitBreakerCooldownSeconds == null ? 60 : circuitBreakerCooldownSeconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int effectivePerExchangeDeployBacklogCapSeconds() {
|
||||||
|
// Default 24 h. Zero or negative = disabled (no clamp — first-run uses rule.createdAt as today).
|
||||||
|
return perExchangeDeployBacklogCapSeconds == null ? 86_400 : perExchangeDeployBacklogCapSeconds;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,165 @@
|
|||||||
|
package io.cameleer.server.app.alerting.controller;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.dto.AlertDto;
|
||||||
|
import io.cameleer.server.app.alerting.dto.BulkIdsRequest;
|
||||||
|
import io.cameleer.server.app.alerting.dto.UnreadCountResponse;
|
||||||
|
import io.cameleer.server.app.alerting.notify.InAppInboxQuery;
|
||||||
|
import io.cameleer.server.app.web.EnvPath;
|
||||||
|
import io.cameleer.server.core.alerting.AlertInstance;
|
||||||
|
import io.cameleer.server.core.alerting.AlertInstanceRepository;
|
||||||
|
import io.cameleer.server.core.alerting.AlertSeverity;
|
||||||
|
import io.cameleer.server.core.alerting.AlertState;
|
||||||
|
import io.cameleer.server.core.runtime.Environment;
|
||||||
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||||
|
import jakarta.validation.Valid;
|
||||||
|
import org.springframework.http.HttpStatus;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.security.access.prepost.PreAuthorize;
|
||||||
|
import org.springframework.security.core.context.SecurityContextHolder;
|
||||||
|
import org.springframework.web.bind.annotation.DeleteMapping;
|
||||||
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.PathVariable;
|
||||||
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestBody;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestParam;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
import org.springframework.web.server.ResponseStatusException;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* REST controller for the in-app alert inbox (env-scoped).
|
||||||
|
* VIEWER+ can read their own inbox; OPERATOR+ can soft-delete and restore alerts.
|
||||||
|
*/
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api/v1/environments/{envSlug}/alerts")
|
||||||
|
@Tag(name = "Alerts Inbox", description = "In-app alert inbox, ack and read tracking (env-scoped)")
|
||||||
|
@PreAuthorize("hasAnyRole('VIEWER','OPERATOR','ADMIN')")
|
||||||
|
public class AlertController {
|
||||||
|
|
||||||
|
private final InAppInboxQuery inboxQuery;
|
||||||
|
private final AlertInstanceRepository instanceRepo;
|
||||||
|
|
||||||
|
public AlertController(InAppInboxQuery inboxQuery,
|
||||||
|
AlertInstanceRepository instanceRepo) {
|
||||||
|
this.inboxQuery = inboxQuery;
|
||||||
|
this.instanceRepo = instanceRepo;
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping
|
||||||
|
public List<AlertDto> list(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@RequestParam(defaultValue = "50") int limit,
|
||||||
|
@RequestParam(required = false) List<AlertState> state,
|
||||||
|
@RequestParam(required = false) List<AlertSeverity> severity,
|
||||||
|
@RequestParam(required = false) Boolean acked,
|
||||||
|
@RequestParam(required = false) Boolean read) {
|
||||||
|
String userId = currentUserId();
|
||||||
|
int effectiveLimit = Math.min(limit, 200);
|
||||||
|
return inboxQuery.listInbox(env.id(), userId, state, severity, acked, read, effectiveLimit)
|
||||||
|
.stream().map(AlertDto::from).toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping("/unread-count")
|
||||||
|
public UnreadCountResponse unreadCount(@EnvPath Environment env) {
|
||||||
|
return inboxQuery.countUnread(env.id(), currentUserId());
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping("/{id}")
|
||||||
|
public AlertDto get(@EnvPath Environment env, @PathVariable UUID id) {
|
||||||
|
AlertInstance instance = requireLiveInstance(id, env.id());
|
||||||
|
return AlertDto.from(instance);
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostMapping("/{id}/ack")
|
||||||
|
public AlertDto ack(@EnvPath Environment env, @PathVariable UUID id) {
|
||||||
|
AlertInstance instance = requireLiveInstance(id, env.id());
|
||||||
|
String userId = currentUserId();
|
||||||
|
instanceRepo.ack(id, userId, Instant.now());
|
||||||
|
// Re-fetch to return fresh state
|
||||||
|
return AlertDto.from(instanceRepo.findById(id)
|
||||||
|
.orElseThrow(() -> new ResponseStatusException(HttpStatus.NOT_FOUND)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostMapping("/{id}/read")
|
||||||
|
public void read(@EnvPath Environment env, @PathVariable UUID id) {
|
||||||
|
requireLiveInstance(id, env.id());
|
||||||
|
instanceRepo.markRead(id, Instant.now());
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostMapping("/bulk-read")
|
||||||
|
public void bulkRead(@EnvPath Environment env,
|
||||||
|
@Valid @RequestBody BulkIdsRequest req) {
|
||||||
|
List<UUID> filtered = inEnvLiveIds(req.instanceIds(), env.id());
|
||||||
|
if (!filtered.isEmpty()) {
|
||||||
|
instanceRepo.bulkMarkRead(filtered, Instant.now());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostMapping("/bulk-ack")
|
||||||
|
public void bulkAck(@EnvPath Environment env,
|
||||||
|
@Valid @RequestBody BulkIdsRequest req) {
|
||||||
|
List<UUID> filtered = inEnvLiveIds(req.instanceIds(), env.id());
|
||||||
|
if (!filtered.isEmpty()) {
|
||||||
|
instanceRepo.bulkAck(filtered, currentUserId(), Instant.now());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DeleteMapping("/{id}")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public ResponseEntity<Void> delete(@EnvPath Environment env, @PathVariable UUID id) {
|
||||||
|
requireLiveInstance(id, env.id());
|
||||||
|
instanceRepo.softDelete(id, Instant.now());
|
||||||
|
return ResponseEntity.noContent().build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostMapping("/bulk-delete")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public void bulkDelete(@EnvPath Environment env,
|
||||||
|
@Valid @RequestBody BulkIdsRequest req) {
|
||||||
|
List<UUID> filtered = inEnvLiveIds(req.instanceIds(), env.id());
|
||||||
|
if (!filtered.isEmpty()) {
|
||||||
|
instanceRepo.bulkSoftDelete(filtered, Instant.now());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostMapping("/{id}/restore")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public ResponseEntity<Void> restore(@EnvPath Environment env, @PathVariable UUID id) {
|
||||||
|
// Unlike requireLiveInstance, restore explicitly targets soft-deleted rows
|
||||||
|
AlertInstance inst = instanceRepo.findById(id)
|
||||||
|
.orElseThrow(() -> new ResponseStatusException(HttpStatus.NOT_FOUND, "Alert not found"));
|
||||||
|
if (!inst.environmentId().equals(env.id()))
|
||||||
|
throw new ResponseStatusException(HttpStatus.NOT_FOUND, "Alert not found in env");
|
||||||
|
instanceRepo.restore(id);
|
||||||
|
return ResponseEntity.noContent().build();
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private AlertInstance requireLiveInstance(UUID id, UUID envId) {
|
||||||
|
AlertInstance i = instanceRepo.findById(id)
|
||||||
|
.orElseThrow(() -> new ResponseStatusException(HttpStatus.NOT_FOUND, "Alert not found"));
|
||||||
|
if (!i.environmentId().equals(envId) || i.deletedAt() != null)
|
||||||
|
throw new ResponseStatusException(HttpStatus.NOT_FOUND, "Alert not found in env");
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<UUID> inEnvLiveIds(List<UUID> ids, UUID envId) {
|
||||||
|
return instanceRepo.filterInEnvLive(ids, envId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String currentUserId() {
|
||||||
|
var auth = SecurityContextHolder.getContext().getAuthentication();
|
||||||
|
if (auth == null || auth.getName() == null) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.UNAUTHORIZED, "No authentication");
|
||||||
|
}
|
||||||
|
String name = auth.getName();
|
||||||
|
return name.startsWith("user:") ? name.substring(5) : name;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
package io.cameleer.server.app.alerting.controller;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.dto.AlertNotificationDto;
|
||||||
|
import io.cameleer.server.app.web.EnvPath;
|
||||||
|
import io.cameleer.server.core.alerting.AlertNotification;
|
||||||
|
import io.cameleer.server.core.alerting.AlertNotificationRepository;
|
||||||
|
import io.cameleer.server.core.alerting.NotificationStatus;
|
||||||
|
import io.cameleer.server.core.runtime.Environment;
|
||||||
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||||
|
import org.springframework.http.HttpStatus;
|
||||||
|
import org.springframework.security.access.prepost.PreAuthorize;
|
||||||
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.PathVariable;
|
||||||
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
import org.springframework.web.server.ResponseStatusException;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* REST controller for alert notifications.
|
||||||
|
* <p>
|
||||||
|
* Env-scoped: GET /api/v1/environments/{envSlug}/alerts/{id}/notifications — lists outbound
|
||||||
|
* notifications for a given alert instance.
|
||||||
|
* <p>
|
||||||
|
* Flat: POST /api/v1/alerts/notifications/{id}/retry — globally unique notification IDs;
|
||||||
|
* flat path matches the /executions/{id} precedent. OPERATOR+ only.
|
||||||
|
*/
|
||||||
|
@RestController
|
||||||
|
@Tag(name = "Alert Notifications", description = "Outbound webhook notification management")
|
||||||
|
public class AlertNotificationController {
|
||||||
|
|
||||||
|
private final AlertNotificationRepository notificationRepo;
|
||||||
|
|
||||||
|
public AlertNotificationController(AlertNotificationRepository notificationRepo) {
|
||||||
|
this.notificationRepo = notificationRepo;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lists notifications for a specific alert instance (env-scoped).
|
||||||
|
* VIEWER+.
|
||||||
|
*/
|
||||||
|
@GetMapping("/api/v1/environments/{envSlug}/alerts/{alertId}/notifications")
|
||||||
|
@PreAuthorize("hasAnyRole('VIEWER','OPERATOR','ADMIN')")
|
||||||
|
public List<AlertNotificationDto> listForInstance(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@PathVariable UUID alertId) {
|
||||||
|
return notificationRepo.listForInstance(alertId)
|
||||||
|
.stream().map(AlertNotificationDto::from).toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retries a failed notification — resets attempts and schedules it for immediate retry.
|
||||||
|
* Notification IDs are globally unique (flat path, matches /executions/{id} precedent).
|
||||||
|
* OPERATOR+ only.
|
||||||
|
*/
|
||||||
|
@PostMapping("/api/v1/alerts/notifications/{id}/retry")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public AlertNotificationDto retry(@PathVariable UUID id) {
|
||||||
|
AlertNotification notification = notificationRepo.findById(id)
|
||||||
|
.orElseThrow(() -> new ResponseStatusException(HttpStatus.NOT_FOUND,
|
||||||
|
"Notification not found: " + id));
|
||||||
|
|
||||||
|
if (notification.status() == NotificationStatus.PENDING) {
|
||||||
|
return AlertNotificationDto.from(notification);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset for retry: status -> PENDING, attempts -> 0, next_attempt_at -> now
|
||||||
|
notificationRepo.resetForRetry(id, Instant.now());
|
||||||
|
|
||||||
|
return AlertNotificationDto.from(notificationRepo.findById(id)
|
||||||
|
.orElseThrow(() -> new ResponseStatusException(HttpStatus.NOT_FOUND)));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,408 @@
|
|||||||
|
package io.cameleer.server.app.alerting.controller;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.dto.AlertRuleRequest;
|
||||||
|
import io.cameleer.server.app.alerting.dto.AlertRuleResponse;
|
||||||
|
import io.cameleer.server.app.alerting.dto.RenderPreviewRequest;
|
||||||
|
import io.cameleer.server.app.alerting.dto.RenderPreviewResponse;
|
||||||
|
import io.cameleer.server.app.alerting.dto.TestEvaluateRequest;
|
||||||
|
import io.cameleer.server.app.alerting.dto.TestEvaluateResponse;
|
||||||
|
import io.cameleer.server.app.alerting.dto.WebhookBindingRequest;
|
||||||
|
import io.cameleer.server.app.alerting.eval.ConditionEvaluator;
|
||||||
|
import io.cameleer.server.app.alerting.eval.EvalContext;
|
||||||
|
import io.cameleer.server.app.alerting.eval.EvalResult;
|
||||||
|
import io.cameleer.server.app.alerting.eval.TickCache;
|
||||||
|
import io.cameleer.server.app.alerting.notify.MustacheRenderer;
|
||||||
|
import io.cameleer.server.app.license.LicenseEnforcer;
|
||||||
|
import io.cameleer.server.app.web.EnvPath;
|
||||||
|
import io.cameleer.server.core.admin.AuditCategory;
|
||||||
|
import io.cameleer.server.core.admin.AuditResult;
|
||||||
|
import io.cameleer.server.core.admin.AuditService;
|
||||||
|
import io.cameleer.server.core.alerting.AlertCondition;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRuleRepository;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRuleTarget;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
import io.cameleer.server.core.alerting.ExchangeMatchCondition;
|
||||||
|
import io.cameleer.server.core.alerting.FireMode;
|
||||||
|
import io.cameleer.server.core.alerting.WebhookBinding;
|
||||||
|
import io.cameleer.server.core.outbound.OutboundConnection;
|
||||||
|
import io.cameleer.server.core.outbound.OutboundConnectionService;
|
||||||
|
import io.cameleer.server.core.runtime.Environment;
|
||||||
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||||
|
import jakarta.servlet.http.HttpServletRequest;
|
||||||
|
import jakarta.validation.Valid;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.http.HttpStatus;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.security.access.prepost.PreAuthorize;
|
||||||
|
import org.springframework.security.core.context.SecurityContextHolder;
|
||||||
|
import org.springframework.web.bind.annotation.DeleteMapping;
|
||||||
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.PathVariable;
|
||||||
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
|
import org.springframework.web.bind.annotation.PutMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestBody;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
import org.springframework.web.server.ResponseStatusException;
|
||||||
|
|
||||||
|
import java.time.Clock;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* REST controller for alert rules (env-scoped).
|
||||||
|
* <p>
|
||||||
|
* CRITICAL: {@link ExchangeMatchCondition#filter()} attribute KEYS are inlined into ClickHouse SQL.
|
||||||
|
* They are validated here at save time to match {@code ^[a-zA-Z0-9._-]+$} before any SQL is built.
|
||||||
|
*/
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api/v1/environments/{envSlug}/alerts/rules")
|
||||||
|
@Tag(name = "Alert Rules", description = "Alert rule management (env-scoped)")
|
||||||
|
@PreAuthorize("hasAnyRole('VIEWER','OPERATOR','ADMIN')")
|
||||||
|
public class AlertRuleController {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attribute KEY allowlist. Keys are inlined into ClickHouse SQL via
|
||||||
|
* {@code JSONExtractString(attributes, '<key>')}, so this pattern is a hard security gate.
|
||||||
|
* Values are always parameter-bound and safe.
|
||||||
|
*/
|
||||||
|
private static final Pattern ATTR_KEY = Pattern.compile("^[a-zA-Z0-9._-]+$");
|
||||||
|
|
||||||
|
private final AlertRuleRepository ruleRepo;
|
||||||
|
private final OutboundConnectionService connectionService;
|
||||||
|
private final AuditService auditService;
|
||||||
|
private final MustacheRenderer renderer;
|
||||||
|
private final Map<ConditionKind, ConditionEvaluator<?>> evaluators;
|
||||||
|
private final Clock clock;
|
||||||
|
private final String tenantId;
|
||||||
|
private final LicenseEnforcer licenseEnforcer;
|
||||||
|
|
||||||
|
@SuppressWarnings("SpringJavaInjectionPointsAutowiringInspection")
|
||||||
|
public AlertRuleController(AlertRuleRepository ruleRepo,
|
||||||
|
OutboundConnectionService connectionService,
|
||||||
|
AuditService auditService,
|
||||||
|
MustacheRenderer renderer,
|
||||||
|
List<ConditionEvaluator<?>> evaluatorList,
|
||||||
|
Clock alertingClock,
|
||||||
|
@Value("${cameleer.server.tenant.id:default}") String tenantId,
|
||||||
|
LicenseEnforcer licenseEnforcer) {
|
||||||
|
this.ruleRepo = ruleRepo;
|
||||||
|
this.connectionService = connectionService;
|
||||||
|
this.auditService = auditService;
|
||||||
|
this.renderer = renderer;
|
||||||
|
this.evaluators = new java.util.EnumMap<>(ConditionKind.class);
|
||||||
|
for (ConditionEvaluator<?> e : evaluatorList) {
|
||||||
|
this.evaluators.put(e.kind(), e);
|
||||||
|
}
|
||||||
|
this.clock = alertingClock;
|
||||||
|
this.tenantId = tenantId;
|
||||||
|
this.licenseEnforcer = licenseEnforcer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// List / Get
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@GetMapping
|
||||||
|
public List<AlertRuleResponse> list(@EnvPath Environment env) {
|
||||||
|
return ruleRepo.listByEnvironment(env.id())
|
||||||
|
.stream().map(AlertRuleResponse::from).toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping("/{id}")
|
||||||
|
public AlertRuleResponse get(@EnvPath Environment env, @PathVariable UUID id) {
|
||||||
|
AlertRule rule = requireRule(id, env.id());
|
||||||
|
return AlertRuleResponse.from(rule);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Create / Update / Delete
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@PostMapping
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public ResponseEntity<AlertRuleResponse> create(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@Valid @RequestBody AlertRuleRequest req,
|
||||||
|
HttpServletRequest httpRequest) {
|
||||||
|
|
||||||
|
licenseEnforcer.assertWithinCap("max_alert_rules", ruleRepo.count(), 1);
|
||||||
|
|
||||||
|
validateAttributeKeys(req.condition());
|
||||||
|
validateBusinessRules(req);
|
||||||
|
validateWebhooks(req.webhooks(), env.id());
|
||||||
|
|
||||||
|
AlertRule draft = buildRule(null, env.id(), req, currentUserId());
|
||||||
|
AlertRule saved = ruleRepo.save(draft);
|
||||||
|
|
||||||
|
auditService.log("ALERT_RULE_CREATE", AuditCategory.ALERT_RULE_CHANGE,
|
||||||
|
saved.id().toString(), Map.of("name", saved.name()), AuditResult.SUCCESS, httpRequest);
|
||||||
|
|
||||||
|
return ResponseEntity.status(HttpStatus.CREATED).body(AlertRuleResponse.from(saved));
|
||||||
|
}
|
||||||
|
|
||||||
|
@PutMapping("/{id}")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public AlertRuleResponse update(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@PathVariable UUID id,
|
||||||
|
@Valid @RequestBody AlertRuleRequest req,
|
||||||
|
HttpServletRequest httpRequest) {
|
||||||
|
|
||||||
|
AlertRule existing = requireRule(id, env.id());
|
||||||
|
validateAttributeKeys(req.condition());
|
||||||
|
validateBusinessRules(req);
|
||||||
|
validateWebhooks(req.webhooks(), env.id());
|
||||||
|
|
||||||
|
AlertRule updated = buildRule(existing, env.id(), req, currentUserId());
|
||||||
|
AlertRule saved = ruleRepo.save(updated);
|
||||||
|
|
||||||
|
auditService.log("ALERT_RULE_UPDATE", AuditCategory.ALERT_RULE_CHANGE,
|
||||||
|
id.toString(), Map.of("name", saved.name()), AuditResult.SUCCESS, httpRequest);
|
||||||
|
|
||||||
|
return AlertRuleResponse.from(saved);
|
||||||
|
}
|
||||||
|
|
||||||
|
@DeleteMapping("/{id}")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public ResponseEntity<Void> delete(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@PathVariable UUID id,
|
||||||
|
HttpServletRequest httpRequest) {
|
||||||
|
|
||||||
|
requireRule(id, env.id());
|
||||||
|
ruleRepo.delete(id);
|
||||||
|
|
||||||
|
auditService.log("ALERT_RULE_DELETE", AuditCategory.ALERT_RULE_CHANGE,
|
||||||
|
id.toString(), Map.of(), AuditResult.SUCCESS, httpRequest);
|
||||||
|
|
||||||
|
return ResponseEntity.noContent().build();
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Enable / Disable
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@PostMapping("/{id}/enable")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public AlertRuleResponse enable(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@PathVariable UUID id,
|
||||||
|
HttpServletRequest httpRequest) {
|
||||||
|
|
||||||
|
AlertRule rule = requireRule(id, env.id());
|
||||||
|
AlertRule updated = withEnabled(rule, true);
|
||||||
|
AlertRule saved = ruleRepo.save(updated);
|
||||||
|
|
||||||
|
auditService.log("ALERT_RULE_ENABLE", AuditCategory.ALERT_RULE_CHANGE,
|
||||||
|
id.toString(), Map.of("name", saved.name()), AuditResult.SUCCESS, httpRequest);
|
||||||
|
|
||||||
|
return AlertRuleResponse.from(saved);
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostMapping("/{id}/disable")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public AlertRuleResponse disable(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@PathVariable UUID id,
|
||||||
|
HttpServletRequest httpRequest) {
|
||||||
|
|
||||||
|
AlertRule rule = requireRule(id, env.id());
|
||||||
|
AlertRule updated = withEnabled(rule, false);
|
||||||
|
AlertRule saved = ruleRepo.save(updated);
|
||||||
|
|
||||||
|
auditService.log("ALERT_RULE_DISABLE", AuditCategory.ALERT_RULE_CHANGE,
|
||||||
|
id.toString(), Map.of("name", saved.name()), AuditResult.SUCCESS, httpRequest);
|
||||||
|
|
||||||
|
return AlertRuleResponse.from(saved);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Render Preview
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@PostMapping("/{id}/render-preview")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public RenderPreviewResponse renderPreview(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@PathVariable UUID id,
|
||||||
|
@RequestBody RenderPreviewRequest req) {
|
||||||
|
|
||||||
|
AlertRule rule = requireRule(id, env.id());
|
||||||
|
Map<String, Object> ctx = req.context();
|
||||||
|
String title = renderer.render(rule.notificationTitleTmpl(), ctx);
|
||||||
|
String message = renderer.render(rule.notificationMessageTmpl(), ctx);
|
||||||
|
return new RenderPreviewResponse(title, message);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Test Evaluate
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@PostMapping("/{id}/test-evaluate")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||||
|
public TestEvaluateResponse testEvaluate(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@PathVariable UUID id,
|
||||||
|
@RequestBody TestEvaluateRequest req) {
|
||||||
|
|
||||||
|
AlertRule rule = requireRule(id, env.id());
|
||||||
|
ConditionEvaluator evaluator = evaluators.get(rule.conditionKind());
|
||||||
|
if (evaluator == null) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.UNPROCESSABLE_ENTITY,
|
||||||
|
"No evaluator registered for condition kind: " + rule.conditionKind());
|
||||||
|
}
|
||||||
|
|
||||||
|
EvalContext ctx = new EvalContext(tenantId, Instant.now(clock), new TickCache());
|
||||||
|
EvalResult result = evaluator.evaluate(rule.condition(), rule, ctx);
|
||||||
|
return TestEvaluateResponse.from(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cross-field business-rule validation for {@link AlertRuleRequest}.
|
||||||
|
*
|
||||||
|
* <p>PER_EXCHANGE rules: re-notify and for-duration are nonsensical (each fire is its own
|
||||||
|
* exchange — there's no "still firing" window and nothing to re-notify about). Reject 400
|
||||||
|
* if either is non-zero.
|
||||||
|
*
|
||||||
|
* <p>All rules: reject 400 if both webhooks and targets are empty — such a rule can never
|
||||||
|
* notify anyone and is a pure footgun.
|
||||||
|
*/
|
||||||
|
private void validateBusinessRules(AlertRuleRequest req) {
|
||||||
|
if (req.condition() instanceof ExchangeMatchCondition ex
|
||||||
|
&& ex.fireMode() == FireMode.PER_EXCHANGE) {
|
||||||
|
if (req.reNotifyMinutes() != null && req.reNotifyMinutes() != 0) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.BAD_REQUEST,
|
||||||
|
"reNotifyMinutes must be 0 for PER_EXCHANGE rules (re-notify does not apply)");
|
||||||
|
}
|
||||||
|
if (req.forDurationSeconds() != null && req.forDurationSeconds() != 0) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.BAD_REQUEST,
|
||||||
|
"forDurationSeconds must be 0 for PER_EXCHANGE rules");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
boolean noWebhooks = req.webhooks() == null || req.webhooks().isEmpty();
|
||||||
|
boolean noTargets = req.targets() == null || req.targets().isEmpty();
|
||||||
|
if (noWebhooks && noTargets) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.BAD_REQUEST,
|
||||||
|
"rule must have at least one webhook or target — otherwise it never notifies anyone");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates that all attribute keys in an {@link ExchangeMatchCondition} match
|
||||||
|
* {@code ^[a-zA-Z0-9._-]+$}. Keys are inlined into ClickHouse SQL, making this
|
||||||
|
* a mandatory SQL-injection prevention gate.
|
||||||
|
*/
|
||||||
|
private void validateAttributeKeys(AlertCondition condition) {
|
||||||
|
if (condition instanceof ExchangeMatchCondition emc && emc.filter() != null) {
|
||||||
|
for (String key : emc.filter().attributes().keySet()) {
|
||||||
|
if (!ATTR_KEY.matcher(key).matches()) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.UNPROCESSABLE_ENTITY,
|
||||||
|
"Invalid attribute key (must match [a-zA-Z0-9._-]+): " + key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates that each webhook outboundConnectionId exists and is allowed in this environment.
|
||||||
|
*/
|
||||||
|
private void validateWebhooks(List<WebhookBindingRequest> webhooks, UUID envId) {
|
||||||
|
for (WebhookBindingRequest wb : webhooks) {
|
||||||
|
OutboundConnection conn;
|
||||||
|
try {
|
||||||
|
conn = connectionService.get(wb.outboundConnectionId());
|
||||||
|
} catch (org.springframework.web.server.ResponseStatusException ex) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.UNPROCESSABLE_ENTITY,
|
||||||
|
"outboundConnectionId not found: " + wb.outboundConnectionId());
|
||||||
|
} catch (Exception ex) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.UNPROCESSABLE_ENTITY,
|
||||||
|
"outboundConnectionId not found: " + wb.outboundConnectionId());
|
||||||
|
}
|
||||||
|
if (!conn.isAllowedInEnvironment(envId)) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.UNPROCESSABLE_ENTITY,
|
||||||
|
"outboundConnection " + wb.outboundConnectionId()
|
||||||
|
+ " is not allowed in this environment");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private AlertRule requireRule(UUID id, UUID envId) {
|
||||||
|
AlertRule rule = ruleRepo.findById(id)
|
||||||
|
.orElseThrow(() -> new ResponseStatusException(HttpStatus.NOT_FOUND,
|
||||||
|
"Alert rule not found: " + id));
|
||||||
|
if (!rule.environmentId().equals(envId)) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.NOT_FOUND,
|
||||||
|
"Alert rule not found in this environment: " + id);
|
||||||
|
}
|
||||||
|
return rule;
|
||||||
|
}
|
||||||
|
|
||||||
|
private AlertRule buildRule(AlertRule existing, UUID envId, AlertRuleRequest req, String userId) {
|
||||||
|
UUID id = existing != null ? existing.id() : UUID.randomUUID();
|
||||||
|
Instant now = Instant.now(clock);
|
||||||
|
Instant createdAt = existing != null ? existing.createdAt() : now;
|
||||||
|
String createdBy = existing != null ? existing.createdBy() : userId;
|
||||||
|
boolean enabled = existing != null ? existing.enabled() : true;
|
||||||
|
|
||||||
|
List<WebhookBinding> webhooks = req.webhooks().stream()
|
||||||
|
.map(wb -> new WebhookBinding(
|
||||||
|
UUID.randomUUID(),
|
||||||
|
wb.outboundConnectionId(),
|
||||||
|
wb.bodyOverride(),
|
||||||
|
wb.headerOverrides()))
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
List<AlertRuleTarget> targets = req.targets() == null ? List.of() : req.targets();
|
||||||
|
|
||||||
|
int evalInterval = req.evaluationIntervalSeconds() != null
|
||||||
|
? req.evaluationIntervalSeconds() : 60;
|
||||||
|
int forDuration = req.forDurationSeconds() != null
|
||||||
|
? req.forDurationSeconds() : 0;
|
||||||
|
int reNotify = req.reNotifyMinutes() != null
|
||||||
|
? req.reNotifyMinutes() : 0;
|
||||||
|
|
||||||
|
String titleTmpl = req.notificationTitleTmpl() != null ? req.notificationTitleTmpl() : "";
|
||||||
|
String messageTmpl = req.notificationMessageTmpl() != null ? req.notificationMessageTmpl() : "";
|
||||||
|
|
||||||
|
return new AlertRule(
|
||||||
|
id, envId, req.name(), req.description(),
|
||||||
|
req.severity(), enabled,
|
||||||
|
req.conditionKind(), req.condition(),
|
||||||
|
evalInterval, forDuration, reNotify,
|
||||||
|
titleTmpl, messageTmpl,
|
||||||
|
webhooks, targets,
|
||||||
|
now, null, null, Map.of(),
|
||||||
|
createdAt, createdBy, now, userId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private AlertRule withEnabled(AlertRule r, boolean enabled) {
|
||||||
|
Instant now = Instant.now(clock);
|
||||||
|
return new AlertRule(
|
||||||
|
r.id(), r.environmentId(), r.name(), r.description(),
|
||||||
|
r.severity(), enabled, r.conditionKind(), r.condition(),
|
||||||
|
r.evaluationIntervalSeconds(), r.forDurationSeconds(), r.reNotifyMinutes(),
|
||||||
|
r.notificationTitleTmpl(), r.notificationMessageTmpl(),
|
||||||
|
r.webhooks(), r.targets(),
|
||||||
|
r.nextEvaluationAt(), r.claimedBy(), r.claimedUntil(), r.evalState(),
|
||||||
|
r.createdAt(), r.createdBy(), now, currentUserId());
|
||||||
|
}
|
||||||
|
|
||||||
|
private String currentUserId() {
|
||||||
|
var auth = SecurityContextHolder.getContext().getAuthentication();
|
||||||
|
if (auth == null || auth.getName() == null) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.UNAUTHORIZED, "No authentication");
|
||||||
|
}
|
||||||
|
String name = auth.getName();
|
||||||
|
return name.startsWith("user:") ? name.substring(5) : name;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,151 @@
|
|||||||
|
package io.cameleer.server.app.alerting.controller;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.dto.AlertSilenceRequest;
|
||||||
|
import io.cameleer.server.app.alerting.dto.AlertSilenceResponse;
|
||||||
|
import io.cameleer.server.app.web.EnvPath;
|
||||||
|
import io.cameleer.server.core.admin.AuditCategory;
|
||||||
|
import io.cameleer.server.core.admin.AuditResult;
|
||||||
|
import io.cameleer.server.core.admin.AuditService;
|
||||||
|
import io.cameleer.server.core.alerting.AlertSilence;
|
||||||
|
import io.cameleer.server.core.alerting.AlertSilenceRepository;
|
||||||
|
import io.cameleer.server.core.runtime.Environment;
|
||||||
|
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||||
|
import jakarta.servlet.http.HttpServletRequest;
|
||||||
|
import jakarta.validation.Valid;
|
||||||
|
import org.springframework.http.HttpStatus;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.security.access.prepost.PreAuthorize;
|
||||||
|
import org.springframework.security.core.context.SecurityContextHolder;
|
||||||
|
import org.springframework.web.bind.annotation.DeleteMapping;
|
||||||
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.PathVariable;
|
||||||
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
|
import org.springframework.web.bind.annotation.PutMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestBody;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
import org.springframework.web.server.ResponseStatusException;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* REST controller for alert silences (env-scoped).
|
||||||
|
* VIEWER+ can list; OPERATOR+ can create/update/delete.
|
||||||
|
*/
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api/v1/environments/{envSlug}/alerts/silences")
|
||||||
|
@Tag(name = "Alert Silences", description = "Alert silence management (env-scoped)")
|
||||||
|
@PreAuthorize("hasAnyRole('VIEWER','OPERATOR','ADMIN')")
|
||||||
|
public class AlertSilenceController {
|
||||||
|
|
||||||
|
private final AlertSilenceRepository silenceRepo;
|
||||||
|
private final AuditService auditService;
|
||||||
|
|
||||||
|
public AlertSilenceController(AlertSilenceRepository silenceRepo,
|
||||||
|
AuditService auditService) {
|
||||||
|
this.silenceRepo = silenceRepo;
|
||||||
|
this.auditService = auditService;
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping
|
||||||
|
public List<AlertSilenceResponse> list(@EnvPath Environment env) {
|
||||||
|
return silenceRepo.listByEnvironment(env.id())
|
||||||
|
.stream().map(AlertSilenceResponse::from).toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostMapping
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public ResponseEntity<AlertSilenceResponse> create(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@Valid @RequestBody AlertSilenceRequest req,
|
||||||
|
HttpServletRequest httpRequest) {
|
||||||
|
|
||||||
|
validateTimeRange(req);
|
||||||
|
|
||||||
|
AlertSilence silence = new AlertSilence(
|
||||||
|
UUID.randomUUID(), env.id(), req.matcher(), req.reason(),
|
||||||
|
req.startsAt(), req.endsAt(),
|
||||||
|
currentUserId(), Instant.now());
|
||||||
|
|
||||||
|
AlertSilence saved = silenceRepo.save(silence);
|
||||||
|
|
||||||
|
auditService.log("ALERT_SILENCE_CREATE", AuditCategory.ALERT_SILENCE_CHANGE,
|
||||||
|
saved.id().toString(), Map.of(), AuditResult.SUCCESS, httpRequest);
|
||||||
|
|
||||||
|
return ResponseEntity.status(HttpStatus.CREATED).body(AlertSilenceResponse.from(saved));
|
||||||
|
}
|
||||||
|
|
||||||
|
@PutMapping("/{id}")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public AlertSilenceResponse update(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@PathVariable UUID id,
|
||||||
|
@Valid @RequestBody AlertSilenceRequest req,
|
||||||
|
HttpServletRequest httpRequest) {
|
||||||
|
|
||||||
|
AlertSilence existing = requireSilence(id, env.id());
|
||||||
|
validateTimeRange(req);
|
||||||
|
|
||||||
|
AlertSilence updated = new AlertSilence(
|
||||||
|
existing.id(), env.id(), req.matcher(), req.reason(),
|
||||||
|
req.startsAt(), req.endsAt(),
|
||||||
|
existing.createdBy(), existing.createdAt());
|
||||||
|
|
||||||
|
AlertSilence saved = silenceRepo.save(updated);
|
||||||
|
|
||||||
|
auditService.log("ALERT_SILENCE_UPDATE", AuditCategory.ALERT_SILENCE_CHANGE,
|
||||||
|
id.toString(), Map.of(), AuditResult.SUCCESS, httpRequest);
|
||||||
|
|
||||||
|
return AlertSilenceResponse.from(saved);
|
||||||
|
}
|
||||||
|
|
||||||
|
@DeleteMapping("/{id}")
|
||||||
|
@PreAuthorize("hasAnyRole('OPERATOR','ADMIN')")
|
||||||
|
public ResponseEntity<Void> delete(
|
||||||
|
@EnvPath Environment env,
|
||||||
|
@PathVariable UUID id,
|
||||||
|
HttpServletRequest httpRequest) {
|
||||||
|
|
||||||
|
requireSilence(id, env.id());
|
||||||
|
silenceRepo.delete(id);
|
||||||
|
|
||||||
|
auditService.log("ALERT_SILENCE_DELETE", AuditCategory.ALERT_SILENCE_CHANGE,
|
||||||
|
id.toString(), Map.of(), AuditResult.SUCCESS, httpRequest);
|
||||||
|
|
||||||
|
return ResponseEntity.noContent().build();
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private void validateTimeRange(AlertSilenceRequest req) {
|
||||||
|
if (!req.endsAt().isAfter(req.startsAt())) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.UNPROCESSABLE_ENTITY,
|
||||||
|
"endsAt must be after startsAt");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private AlertSilence requireSilence(UUID id, UUID envId) {
|
||||||
|
AlertSilence silence = silenceRepo.findById(id)
|
||||||
|
.orElseThrow(() -> new ResponseStatusException(HttpStatus.NOT_FOUND,
|
||||||
|
"Alert silence not found: " + id));
|
||||||
|
if (!silence.environmentId().equals(envId)) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.NOT_FOUND,
|
||||||
|
"Alert silence not found in this environment: " + id);
|
||||||
|
}
|
||||||
|
return silence;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String currentUserId() {
|
||||||
|
var auth = SecurityContextHolder.getContext().getAuthentication();
|
||||||
|
if (auth == null || auth.getName() == null) {
|
||||||
|
throw new ResponseStatusException(HttpStatus.UNAUTHORIZED, "No authentication");
|
||||||
|
}
|
||||||
|
String name = auth.getName();
|
||||||
|
return name.startsWith("user:") ? name.substring(5) : name;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertInstance;
|
||||||
|
import io.cameleer.server.core.alerting.AlertSeverity;
|
||||||
|
import io.cameleer.server.core.alerting.AlertState;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public record AlertDto(
|
||||||
|
UUID id,
|
||||||
|
UUID ruleId,
|
||||||
|
UUID environmentId,
|
||||||
|
AlertState state,
|
||||||
|
AlertSeverity severity,
|
||||||
|
String title,
|
||||||
|
String message,
|
||||||
|
Instant firedAt,
|
||||||
|
Instant ackedAt,
|
||||||
|
String ackedBy,
|
||||||
|
Instant resolvedAt,
|
||||||
|
Instant readAt, // global "has anyone read this"
|
||||||
|
boolean silenced,
|
||||||
|
Double currentValue,
|
||||||
|
Double threshold,
|
||||||
|
Map<String, Object> context
|
||||||
|
) {
|
||||||
|
public static AlertDto from(AlertInstance i) {
|
||||||
|
return new AlertDto(
|
||||||
|
i.id(), i.ruleId(), i.environmentId(), i.state(), i.severity(),
|
||||||
|
i.title(), i.message(), i.firedAt(), i.ackedAt(), i.ackedBy(),
|
||||||
|
i.resolvedAt(), i.readAt(), i.silenced(),
|
||||||
|
i.currentValue(), i.threshold(), i.context());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertNotification;
|
||||||
|
import io.cameleer.server.core.alerting.NotificationStatus;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public record AlertNotificationDto(
|
||||||
|
UUID id,
|
||||||
|
UUID alertInstanceId,
|
||||||
|
UUID webhookId,
|
||||||
|
UUID outboundConnectionId,
|
||||||
|
NotificationStatus status,
|
||||||
|
int attempts,
|
||||||
|
Instant nextAttemptAt,
|
||||||
|
Integer lastResponseStatus,
|
||||||
|
String lastResponseSnippet,
|
||||||
|
Instant deliveredAt,
|
||||||
|
Instant createdAt
|
||||||
|
) {
|
||||||
|
public static AlertNotificationDto from(AlertNotification n) {
|
||||||
|
return new AlertNotificationDto(
|
||||||
|
n.id(), n.alertInstanceId(), n.webhookId(), n.outboundConnectionId(),
|
||||||
|
n.status(), n.attempts(), n.nextAttemptAt(),
|
||||||
|
n.lastResponseStatus(), n.lastResponseSnippet(),
|
||||||
|
n.deliveredAt(), n.createdAt());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertCondition;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRuleTarget;
|
||||||
|
import io.cameleer.server.core.alerting.AlertSeverity;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
import jakarta.validation.Valid;
|
||||||
|
import jakarta.validation.constraints.NotBlank;
|
||||||
|
import jakarta.validation.constraints.NotNull;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public record AlertRuleRequest(
|
||||||
|
@NotBlank String name,
|
||||||
|
String description,
|
||||||
|
@NotNull AlertSeverity severity,
|
||||||
|
@NotNull ConditionKind conditionKind,
|
||||||
|
@NotNull @Valid AlertCondition condition,
|
||||||
|
Integer evaluationIntervalSeconds,
|
||||||
|
Integer forDurationSeconds,
|
||||||
|
Integer reNotifyMinutes,
|
||||||
|
String notificationTitleTmpl,
|
||||||
|
String notificationMessageTmpl,
|
||||||
|
List<WebhookBindingRequest> webhooks,
|
||||||
|
List<AlertRuleTarget> targets
|
||||||
|
) {
|
||||||
|
public AlertRuleRequest {
|
||||||
|
webhooks = webhooks == null ? List.of() : List.copyOf(webhooks);
|
||||||
|
targets = targets == null ? List.of() : List.copyOf(targets);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertCondition;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRuleTarget;
|
||||||
|
import io.cameleer.server.core.alerting.AlertSeverity;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public record AlertRuleResponse(
|
||||||
|
UUID id,
|
||||||
|
UUID environmentId,
|
||||||
|
String name,
|
||||||
|
String description,
|
||||||
|
AlertSeverity severity,
|
||||||
|
boolean enabled,
|
||||||
|
ConditionKind conditionKind,
|
||||||
|
AlertCondition condition,
|
||||||
|
int evaluationIntervalSeconds,
|
||||||
|
int forDurationSeconds,
|
||||||
|
int reNotifyMinutes,
|
||||||
|
String notificationTitleTmpl,
|
||||||
|
String notificationMessageTmpl,
|
||||||
|
List<WebhookBindingResponse> webhooks,
|
||||||
|
List<AlertRuleTarget> targets,
|
||||||
|
Instant createdAt,
|
||||||
|
String createdBy,
|
||||||
|
Instant updatedAt,
|
||||||
|
String updatedBy
|
||||||
|
) {
|
||||||
|
public static AlertRuleResponse from(AlertRule r) {
|
||||||
|
List<WebhookBindingResponse> webhooks = r.webhooks().stream()
|
||||||
|
.map(WebhookBindingResponse::from)
|
||||||
|
.toList();
|
||||||
|
return new AlertRuleResponse(
|
||||||
|
r.id(), r.environmentId(), r.name(), r.description(),
|
||||||
|
r.severity(), r.enabled(), r.conditionKind(), r.condition(),
|
||||||
|
r.evaluationIntervalSeconds(), r.forDurationSeconds(), r.reNotifyMinutes(),
|
||||||
|
r.notificationTitleTmpl(), r.notificationMessageTmpl(),
|
||||||
|
webhooks, r.targets(),
|
||||||
|
r.createdAt(), r.createdBy(), r.updatedAt(), r.updatedBy());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.SilenceMatcher;
|
||||||
|
import jakarta.validation.Valid;
|
||||||
|
import jakarta.validation.constraints.NotNull;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
|
||||||
|
public record AlertSilenceRequest(
|
||||||
|
@NotNull @Valid SilenceMatcher matcher,
|
||||||
|
String reason,
|
||||||
|
@NotNull Instant startsAt,
|
||||||
|
@NotNull Instant endsAt
|
||||||
|
) {}
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertSilence;
|
||||||
|
import io.cameleer.server.core.alerting.SilenceMatcher;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public record AlertSilenceResponse(
|
||||||
|
UUID id,
|
||||||
|
UUID environmentId,
|
||||||
|
SilenceMatcher matcher,
|
||||||
|
String reason,
|
||||||
|
Instant startsAt,
|
||||||
|
Instant endsAt,
|
||||||
|
String createdBy,
|
||||||
|
Instant createdAt
|
||||||
|
) {
|
||||||
|
public static AlertSilenceResponse from(AlertSilence s) {
|
||||||
|
return new AlertSilenceResponse(
|
||||||
|
s.id(), s.environmentId(), s.matcher(), s.reason(),
|
||||||
|
s.startsAt(), s.endsAt(), s.createdBy(), s.createdAt());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import jakarta.validation.constraints.NotNull;
|
||||||
|
import jakarta.validation.constraints.Size;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
/** Shared body for bulk-read / bulk-ack / bulk-delete requests. */
|
||||||
|
public record BulkIdsRequest(@NotNull @Size(min = 1, max = 500) List<UUID> instanceIds) {}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Canned context for rendering a Mustache template preview without firing a real alert.
|
||||||
|
* All fields are optional — missing context keys render as empty string.
|
||||||
|
*/
|
||||||
|
public record RenderPreviewRequest(Map<String, Object> context) {
|
||||||
|
public RenderPreviewRequest {
|
||||||
|
context = context == null ? Map.of() : Map.copyOf(context);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
public record RenderPreviewResponse(String title, String message) {}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Request body for POST {id}/test-evaluate.
|
||||||
|
* Currently empty — the evaluator runs against live data using the saved rule definition.
|
||||||
|
* Reserved for future overrides (e.g., custom time window).
|
||||||
|
*/
|
||||||
|
public record TestEvaluateRequest() {}
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.eval.EvalResult;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of a one-shot evaluator run against live data (does not persist any state).
|
||||||
|
*/
|
||||||
|
public record TestEvaluateResponse(String resultKind, String detail) {
|
||||||
|
|
||||||
|
public static TestEvaluateResponse from(EvalResult result) {
|
||||||
|
if (result instanceof EvalResult.Firing f) {
|
||||||
|
return new TestEvaluateResponse("FIRING",
|
||||||
|
"currentValue=" + f.currentValue() + " threshold=" + f.threshold());
|
||||||
|
} else if (result instanceof EvalResult.Clear) {
|
||||||
|
return new TestEvaluateResponse("CLEAR", null);
|
||||||
|
} else if (result instanceof EvalResult.Error e) {
|
||||||
|
return new TestEvaluateResponse("ERROR",
|
||||||
|
e.cause() != null ? e.cause().getMessage() : "unknown error");
|
||||||
|
} else if (result instanceof EvalResult.Batch b) {
|
||||||
|
return new TestEvaluateResponse("BATCH", b.firings().size() + " firing(s)");
|
||||||
|
}
|
||||||
|
return new TestEvaluateResponse("UNKNOWN", result.getClass().getSimpleName());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertSeverity;
|
||||||
|
|
||||||
|
import java.util.EnumMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response shape for {@code GET /alerts/unread-count}.
|
||||||
|
* <p>
|
||||||
|
* {@code total} is the sum of {@code bySeverity} values. The UI branches bell colour on
|
||||||
|
* the highest severity present, so callers can inspect the map directly.
|
||||||
|
*/
|
||||||
|
public record UnreadCountResponse(long total, Map<AlertSeverity, Long> bySeverity) {
|
||||||
|
|
||||||
|
public UnreadCountResponse {
|
||||||
|
// Defensive copy + fill in missing severities as 0 so the UI never sees null/undefined.
|
||||||
|
EnumMap<AlertSeverity, Long> normalized = new EnumMap<>(AlertSeverity.class);
|
||||||
|
for (AlertSeverity s : AlertSeverity.values()) normalized.put(s, 0L);
|
||||||
|
if (bySeverity != null) bySeverity.forEach((k, v) -> normalized.put(k, v == null ? 0L : v));
|
||||||
|
bySeverity = Map.copyOf(normalized);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static UnreadCountResponse from(Map<AlertSeverity, Long> counts) {
|
||||||
|
long total = counts == null ? 0L
|
||||||
|
: counts.values().stream().filter(v -> v != null).mapToLong(Long::longValue).sum();
|
||||||
|
return new UnreadCountResponse(total, counts == null ? Map.of() : counts);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import jakarta.validation.constraints.NotNull;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public record WebhookBindingRequest(
|
||||||
|
@NotNull UUID outboundConnectionId,
|
||||||
|
String bodyOverride,
|
||||||
|
Map<String, String> headerOverrides
|
||||||
|
) {
|
||||||
|
public WebhookBindingRequest {
|
||||||
|
headerOverrides = headerOverrides == null ? Map.of() : Map.copyOf(headerOverrides);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
package io.cameleer.server.app.alerting.dto;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.WebhookBinding;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public record WebhookBindingResponse(
|
||||||
|
UUID id,
|
||||||
|
UUID outboundConnectionId,
|
||||||
|
String bodyOverride,
|
||||||
|
Map<String, String> headerOverrides
|
||||||
|
) {
|
||||||
|
public static WebhookBindingResponse from(WebhookBinding wb) {
|
||||||
|
return new WebhookBindingResponse(
|
||||||
|
wb.id(), wb.outboundConnectionId(), wb.bodyOverride(), wb.headerOverrides());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,95 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.agent.AgentEventRecord;
|
||||||
|
import io.cameleer.server.core.agent.AgentEventRepository;
|
||||||
|
import io.cameleer.server.core.alerting.AgentLifecycleCondition;
|
||||||
|
import io.cameleer.server.core.alerting.AgentLifecycleEventType;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.AlertScope;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
import io.cameleer.server.core.runtime.EnvironmentRepository;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Evaluator for {@link AgentLifecycleCondition}.
|
||||||
|
* <p>
|
||||||
|
* Each matching row in {@code agent_events} produces its own {@link EvalResult.Firing}
|
||||||
|
* in an {@link EvalResult.Batch}, so every {@code (agent, eventType, timestamp)}
|
||||||
|
* tuple gets its own {@code AlertInstance} — operationally distinct outages /
|
||||||
|
* restarts / shutdowns are independently ackable. Deduplication across ticks
|
||||||
|
* is enforced by {@code alert_instances_open_rule_uq} via the canonical
|
||||||
|
* {@code _subjectFingerprint} key in the instance context (see V16 migration).
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class AgentLifecycleEvaluator implements ConditionEvaluator<AgentLifecycleCondition> {
|
||||||
|
|
||||||
|
/** Hard cap on rows returned per tick — prevents a flood of stale events from overwhelming the job. */
|
||||||
|
private static final int MAX_EVENTS_PER_TICK = 500;
|
||||||
|
|
||||||
|
private final AgentEventRepository eventRepo;
|
||||||
|
private final EnvironmentRepository envRepo;
|
||||||
|
|
||||||
|
public AgentLifecycleEvaluator(AgentEventRepository eventRepo, EnvironmentRepository envRepo) {
|
||||||
|
this.eventRepo = eventRepo;
|
||||||
|
this.envRepo = envRepo;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ConditionKind kind() { return ConditionKind.AGENT_LIFECYCLE; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EvalResult evaluate(AgentLifecycleCondition c, AlertRule rule, EvalContext ctx) {
|
||||||
|
String envSlug = envRepo.findById(rule.environmentId())
|
||||||
|
.map(e -> e.slug())
|
||||||
|
.orElse(null);
|
||||||
|
if (envSlug == null) return EvalResult.Clear.INSTANCE;
|
||||||
|
|
||||||
|
AlertScope scope = c.scope();
|
||||||
|
String appSlug = scope != null ? scope.appSlug() : null;
|
||||||
|
String agentId = scope != null ? scope.agentId() : null;
|
||||||
|
|
||||||
|
List<String> typeNames = c.eventTypes().stream()
|
||||||
|
.map(AgentLifecycleEventType::name)
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
Instant from = ctx.now().minusSeconds(c.withinSeconds());
|
||||||
|
Instant to = ctx.now();
|
||||||
|
|
||||||
|
List<AgentEventRecord> matches = eventRepo.findInWindow(
|
||||||
|
envSlug, appSlug, agentId, typeNames, from, to, MAX_EVENTS_PER_TICK);
|
||||||
|
|
||||||
|
if (matches.isEmpty()) return new EvalResult.Batch(List.of(), Map.of());
|
||||||
|
|
||||||
|
List<EvalResult.Firing> firings = new ArrayList<>(matches.size());
|
||||||
|
for (AgentEventRecord ev : matches) {
|
||||||
|
firings.add(toFiring(ev));
|
||||||
|
}
|
||||||
|
return new EvalResult.Batch(firings, Map.of());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static EvalResult.Firing toFiring(AgentEventRecord ev) {
|
||||||
|
String fingerprint = (ev.instanceId() == null ? "" : ev.instanceId())
|
||||||
|
+ ":" + (ev.eventType() == null ? "" : ev.eventType())
|
||||||
|
+ ":" + (ev.timestamp() == null ? "0" : Long.toString(ev.timestamp().toEpochMilli()));
|
||||||
|
|
||||||
|
Map<String, Object> context = new LinkedHashMap<>();
|
||||||
|
context.put("agent", Map.of(
|
||||||
|
"id", ev.instanceId() == null ? "" : ev.instanceId(),
|
||||||
|
"app", ev.applicationId() == null ? "" : ev.applicationId()
|
||||||
|
));
|
||||||
|
context.put("event", Map.of(
|
||||||
|
"type", ev.eventType() == null ? "" : ev.eventType(),
|
||||||
|
"timestamp", ev.timestamp() == null ? "" : ev.timestamp().toString(),
|
||||||
|
"detail", ev.detail() == null ? "" : ev.detail()
|
||||||
|
));
|
||||||
|
context.put("_subjectFingerprint", fingerprint);
|
||||||
|
|
||||||
|
return new EvalResult.Firing(1.0, null, context);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,61 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.agent.AgentInfo;
|
||||||
|
import io.cameleer.server.core.agent.AgentRegistryService;
|
||||||
|
import io.cameleer.server.core.agent.AgentState;
|
||||||
|
import io.cameleer.server.core.alerting.AgentStateCondition;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.AlertScope;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class AgentStateEvaluator implements ConditionEvaluator<AgentStateCondition> {
|
||||||
|
|
||||||
|
private final AgentRegistryService registry;
|
||||||
|
|
||||||
|
public AgentStateEvaluator(AgentRegistryService registry) {
|
||||||
|
this.registry = registry;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ConditionKind kind() { return ConditionKind.AGENT_STATE; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EvalResult evaluate(AgentStateCondition c, AlertRule rule, EvalContext ctx) {
|
||||||
|
AgentState target = AgentState.valueOf(c.state());
|
||||||
|
Instant cutoff = ctx.now().minusSeconds(c.forSeconds());
|
||||||
|
|
||||||
|
List<AgentInfo> hits = registry.findAll().stream()
|
||||||
|
.filter(a -> matchesScope(a, c.scope()))
|
||||||
|
.filter(a -> a.state() == target)
|
||||||
|
.filter(a -> a.lastHeartbeat() != null && a.lastHeartbeat().isBefore(cutoff))
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
if (hits.isEmpty()) return EvalResult.Clear.INSTANCE;
|
||||||
|
|
||||||
|
AgentInfo first = hits.get(0);
|
||||||
|
return new EvalResult.Firing(
|
||||||
|
(double) hits.size(), null,
|
||||||
|
Map.of(
|
||||||
|
"agent", Map.of(
|
||||||
|
"id", first.instanceId(),
|
||||||
|
"name", first.displayName(),
|
||||||
|
"state", first.state().name()
|
||||||
|
),
|
||||||
|
"app", Map.of("slug", first.applicationId())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean matchesScope(AgentInfo a, AlertScope s) {
|
||||||
|
if (s == null) return true;
|
||||||
|
if (s.appSlug() != null && !s.appSlug().equals(a.applicationId())) return false;
|
||||||
|
if (s.agentId() != null && !s.agentId().equals(a.instanceId())) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,315 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.config.AlertingProperties;
|
||||||
|
import io.cameleer.server.app.alerting.metrics.AlertingMetrics;
|
||||||
|
import io.cameleer.server.app.alerting.notify.MustacheRenderer;
|
||||||
|
import io.cameleer.server.app.alerting.notify.NotificationContextBuilder;
|
||||||
|
import io.cameleer.server.core.alerting.*;
|
||||||
|
import io.cameleer.server.core.runtime.Environment;
|
||||||
|
import io.cameleer.server.core.runtime.EnvironmentRepository;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.beans.factory.annotation.Qualifier;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.scheduling.annotation.SchedulingConfigurer;
|
||||||
|
import org.springframework.scheduling.config.ScheduledTaskRegistrar;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.time.Clock;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Claim-polling evaluator job.
|
||||||
|
* <p>
|
||||||
|
* On each tick, claims a batch of due {@link AlertRule}s via {@code FOR UPDATE SKIP LOCKED},
|
||||||
|
* invokes the matching {@link ConditionEvaluator}, applies the {@link AlertStateTransitions}
|
||||||
|
* state machine, persists any new/updated {@link AlertInstance}, enqueues webhook
|
||||||
|
* {@link AlertNotification}s on first-fire, and releases the claim.
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class AlertEvaluatorJob implements SchedulingConfigurer {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(AlertEvaluatorJob.class);
|
||||||
|
|
||||||
|
private final AlertingProperties props;
|
||||||
|
private final AlertRuleRepository ruleRepo;
|
||||||
|
private final AlertInstanceRepository instanceRepo;
|
||||||
|
private final AlertNotificationRepository notificationRepo;
|
||||||
|
private final Map<ConditionKind, ConditionEvaluator<?>> evaluators;
|
||||||
|
private final PerKindCircuitBreaker circuitBreaker;
|
||||||
|
private final MustacheRenderer renderer;
|
||||||
|
private final NotificationContextBuilder contextBuilder;
|
||||||
|
private final EnvironmentRepository environmentRepo;
|
||||||
|
private final ObjectMapper objectMapper;
|
||||||
|
private final BatchResultApplier batchResultApplier;
|
||||||
|
private final String instanceId;
|
||||||
|
private final String tenantId;
|
||||||
|
private final Clock clock;
|
||||||
|
private final AlertingMetrics metrics;
|
||||||
|
|
||||||
|
@SuppressWarnings("SpringJavaInjectionPointsAutowiringInspection")
|
||||||
|
public AlertEvaluatorJob(
|
||||||
|
AlertingProperties props,
|
||||||
|
AlertRuleRepository ruleRepo,
|
||||||
|
AlertInstanceRepository instanceRepo,
|
||||||
|
AlertNotificationRepository notificationRepo,
|
||||||
|
List<ConditionEvaluator<?>> evaluatorList,
|
||||||
|
PerKindCircuitBreaker circuitBreaker,
|
||||||
|
MustacheRenderer renderer,
|
||||||
|
NotificationContextBuilder contextBuilder,
|
||||||
|
EnvironmentRepository environmentRepo,
|
||||||
|
ObjectMapper objectMapper,
|
||||||
|
BatchResultApplier batchResultApplier,
|
||||||
|
@Qualifier("alertingInstanceId") String instanceId,
|
||||||
|
@Value("${cameleer.server.tenant.id:default}") String tenantId,
|
||||||
|
Clock alertingClock,
|
||||||
|
AlertingMetrics metrics) {
|
||||||
|
|
||||||
|
this.props = props;
|
||||||
|
this.ruleRepo = ruleRepo;
|
||||||
|
this.instanceRepo = instanceRepo;
|
||||||
|
this.notificationRepo = notificationRepo;
|
||||||
|
this.evaluators = evaluatorList.stream()
|
||||||
|
.collect(Collectors.toMap(ConditionEvaluator::kind, e -> e));
|
||||||
|
this.circuitBreaker = circuitBreaker;
|
||||||
|
this.renderer = renderer;
|
||||||
|
this.contextBuilder = contextBuilder;
|
||||||
|
this.environmentRepo = environmentRepo;
|
||||||
|
this.objectMapper = objectMapper;
|
||||||
|
this.batchResultApplier = batchResultApplier;
|
||||||
|
this.instanceId = instanceId;
|
||||||
|
this.tenantId = tenantId;
|
||||||
|
this.clock = alertingClock;
|
||||||
|
this.metrics = metrics;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// SchedulingConfigurer — register the tick as a fixed-delay task
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void configureTasks(ScheduledTaskRegistrar registrar) {
|
||||||
|
registrar.addFixedDelayTask(this::tick, props.effectiveEvaluatorTickIntervalMs());
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Tick — package-visible for same-package tests; also accessible cross-package for lifecycle ITs
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
public void tick() {
|
||||||
|
List<AlertRule> claimed = ruleRepo.claimDueRules(
|
||||||
|
instanceId,
|
||||||
|
props.effectiveEvaluatorBatchSize(),
|
||||||
|
props.effectiveClaimTtlSeconds());
|
||||||
|
|
||||||
|
if (claimed.isEmpty()) return;
|
||||||
|
|
||||||
|
TickCache cache = new TickCache();
|
||||||
|
EvalContext ctx = new EvalContext(tenantId, Instant.now(clock), cache);
|
||||||
|
|
||||||
|
for (AlertRule rule : claimed) {
|
||||||
|
Instant nextRun = Instant.now(clock).plusSeconds(rule.evaluationIntervalSeconds());
|
||||||
|
if (circuitBreaker.isOpen(rule.conditionKind())) {
|
||||||
|
log.debug("Circuit breaker open for {}; skipping rule {}", rule.conditionKind(), rule.id());
|
||||||
|
reschedule(rule, nextRun);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
EvalResult result;
|
||||||
|
try {
|
||||||
|
result = metrics.evalDuration(rule.conditionKind())
|
||||||
|
.recordCallable(() -> evaluateSafely(rule, ctx));
|
||||||
|
} catch (Exception e) {
|
||||||
|
metrics.evalError(rule.conditionKind(), rule.id());
|
||||||
|
circuitBreaker.recordFailure(rule.conditionKind());
|
||||||
|
log.warn("Evaluator error for rule {} ({}): {}", rule.id(), rule.conditionKind(), e.toString());
|
||||||
|
// Evaluation itself failed — release the claim so the rule can be
|
||||||
|
// retried on the next tick. Cursor stays put.
|
||||||
|
reschedule(rule, nextRun);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result instanceof EvalResult.Batch b) {
|
||||||
|
// Phase 2: the Batch path is atomic. The @Transactional apply() on
|
||||||
|
// BatchResultApplier wraps instance writes, notification enqueues,
|
||||||
|
// AND the cursor advance + releaseClaim into a single tx. A
|
||||||
|
// mid-batch fault rolls everything back — including the cursor —
|
||||||
|
// so the next tick replays the whole batch exactly once.
|
||||||
|
try {
|
||||||
|
batchResultApplier.apply(rule, b, nextRun);
|
||||||
|
circuitBreaker.recordSuccess(rule.conditionKind());
|
||||||
|
} catch (Exception e) {
|
||||||
|
metrics.evalError(rule.conditionKind(), rule.id());
|
||||||
|
circuitBreaker.recordFailure(rule.conditionKind());
|
||||||
|
log.warn("Batch apply failed for rule {} ({}): {} — rolling back; next tick will retry",
|
||||||
|
rule.id(), rule.conditionKind(), e.toString());
|
||||||
|
// The transaction rolled back. Do NOT call reschedule here —
|
||||||
|
// leaving claim + next_evaluation_at as they were means the
|
||||||
|
// claim TTL takes over and the rule becomes due on its own.
|
||||||
|
// Rethrowing is unnecessary for correctness — the cursor
|
||||||
|
// stayed put, so exactly-once-per-exchange is preserved.
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Non-Batch path (FIRING / Clear / Error): classic apply + rule
|
||||||
|
// reschedule. Not wrapped in a single tx — semantics unchanged
|
||||||
|
// from pre-Phase-2.
|
||||||
|
try {
|
||||||
|
applyResult(rule, result);
|
||||||
|
circuitBreaker.recordSuccess(rule.conditionKind());
|
||||||
|
} catch (Exception e) {
|
||||||
|
metrics.evalError(rule.conditionKind(), rule.id());
|
||||||
|
circuitBreaker.recordFailure(rule.conditionKind());
|
||||||
|
log.warn("applyResult failed for rule {} ({}): {}",
|
||||||
|
rule.id(), rule.conditionKind(), e.toString());
|
||||||
|
} finally {
|
||||||
|
reschedule(rule, nextRun);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sweepReNotify();
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Re-notification cadence sweep
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private void sweepReNotify() {
|
||||||
|
Instant now = Instant.now(clock);
|
||||||
|
List<AlertInstance> due = instanceRepo.listFiringDueForReNotify(now);
|
||||||
|
for (AlertInstance i : due) {
|
||||||
|
try {
|
||||||
|
AlertRule rule = i.ruleId() == null ? null : ruleRepo.findById(i.ruleId()).orElse(null);
|
||||||
|
if (rule == null || rule.reNotifyMinutes() <= 0) continue;
|
||||||
|
enqueueNotifications(rule, i, now);
|
||||||
|
instanceRepo.save(i.withLastNotifiedAt(now));
|
||||||
|
log.debug("Re-notify enqueued for instance {} (rule {})", i.id(), i.ruleId());
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Re-notify sweep error for instance {}: {}", i.id(), e.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Evaluation
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||||
|
private EvalResult evaluateSafely(AlertRule rule, EvalContext ctx) {
|
||||||
|
ConditionEvaluator evaluator = evaluators.get(rule.conditionKind());
|
||||||
|
if (evaluator == null) {
|
||||||
|
throw new IllegalStateException("No evaluator registered for " + rule.conditionKind());
|
||||||
|
}
|
||||||
|
return evaluator.evaluate(rule.condition(), rule, ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// State machine application
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private void applyResult(AlertRule rule, EvalResult result) {
|
||||||
|
// Note: the Batch path is handled by BatchResultApplier (transactional) —
|
||||||
|
// tick() routes Batch results there directly and never calls applyResult
|
||||||
|
// for them. This method only handles FIRING / Clear / Error state-machine
|
||||||
|
// transitions for the classic (non-PER_EXCHANGE) path.
|
||||||
|
AlertInstance current = instanceRepo.findOpenForRule(rule.id()).orElse(null);
|
||||||
|
Instant now = Instant.now(clock);
|
||||||
|
|
||||||
|
AlertStateTransitions.apply(current, result, rule, now).ifPresent(next -> {
|
||||||
|
// Determine whether this is a newly created instance transitioning to FIRING
|
||||||
|
boolean isFirstFire = current == null && next.state() == AlertState.FIRING;
|
||||||
|
boolean promotedFromPending = current != null
|
||||||
|
&& current.state() == AlertState.PENDING
|
||||||
|
&& next.state() == AlertState.FIRING;
|
||||||
|
|
||||||
|
AlertInstance withSnapshot = next.withRuleSnapshot(snapshotRule(rule));
|
||||||
|
AlertInstance enriched = enrichTitleMessage(rule, withSnapshot);
|
||||||
|
AlertInstance persisted = instanceRepo.save(enriched);
|
||||||
|
|
||||||
|
if (isFirstFire || promotedFromPending) {
|
||||||
|
enqueueNotifications(rule, persisted, now);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Title / message rendering
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private AlertInstance enrichTitleMessage(AlertRule rule, AlertInstance instance) {
|
||||||
|
Environment env = environmentRepo.findById(rule.environmentId()).orElse(null);
|
||||||
|
Map<String, Object> ctx = contextBuilder.build(rule, instance, env, null);
|
||||||
|
String title = renderer.render(rule.notificationTitleTmpl(), ctx);
|
||||||
|
String message = renderer.render(rule.notificationMessageTmpl(), ctx);
|
||||||
|
return instance.withTitleMessage(title, message);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Notification enqueue
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private void enqueueNotifications(AlertRule rule, AlertInstance instance, Instant now) {
|
||||||
|
for (WebhookBinding w : rule.webhooks()) {
|
||||||
|
Map<String, Object> payload = buildPayload(rule, instance);
|
||||||
|
notificationRepo.save(new AlertNotification(
|
||||||
|
UUID.randomUUID(),
|
||||||
|
instance.id(),
|
||||||
|
w.id(),
|
||||||
|
w.outboundConnectionId(),
|
||||||
|
NotificationStatus.PENDING,
|
||||||
|
0,
|
||||||
|
now,
|
||||||
|
null, null, null, null,
|
||||||
|
payload,
|
||||||
|
null,
|
||||||
|
now));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> buildPayload(AlertRule rule, AlertInstance instance) {
|
||||||
|
Environment env = environmentRepo.findById(rule.environmentId()).orElse(null);
|
||||||
|
return contextBuilder.build(rule, instance, env, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Claim release
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private void reschedule(AlertRule rule, Instant nextRun) {
|
||||||
|
ruleRepo.releaseClaim(rule.id(), nextRun, rule.evalState());
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Rule snapshot helper (used by tests / future extensions)
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Map<String, Object> snapshotRule(AlertRule rule) {
|
||||||
|
try {
|
||||||
|
Map<String, Object> raw = objectMapper.convertValue(rule, Map.class);
|
||||||
|
// Map.copyOf (used in AlertInstance compact ctor) rejects null values —
|
||||||
|
// strip them so the snapshot is safe to store.
|
||||||
|
Map<String, Object> safe = new java.util.LinkedHashMap<>();
|
||||||
|
raw.forEach((k, v) -> { if (v != null) safe.put(k, v); });
|
||||||
|
return safe;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Failed to snapshot rule {}: {}", rule.id(), e.getMessage());
|
||||||
|
return Map.of("id", rule.id().toString(), "name", rule.name());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Visible for testing
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/** Returns the evaluator map (for inspection in tests). */
|
||||||
|
Map<ConditionKind, ConditionEvaluator<?>> evaluators() {
|
||||||
|
return evaluators;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,141 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertInstance;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRuleTarget;
|
||||||
|
import io.cameleer.server.core.alerting.AlertSeverity;
|
||||||
|
import io.cameleer.server.core.alerting.AlertState;
|
||||||
|
import io.cameleer.server.core.alerting.TargetKind;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pure, stateless state-machine for alert instance transitions.
|
||||||
|
* <p>
|
||||||
|
* Given the current open instance (nullable) and an EvalResult, returns the new/updated
|
||||||
|
* AlertInstance or {@link Optional#empty()} when no action is needed.
|
||||||
|
* <p>
|
||||||
|
* Batch results must be handled directly in the job; this helper returns empty for them.
|
||||||
|
*/
|
||||||
|
public final class AlertStateTransitions {
|
||||||
|
|
||||||
|
private AlertStateTransitions() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply an EvalResult to the current open AlertInstance.
|
||||||
|
*
|
||||||
|
* @param current the open instance for this rule (PENDING / FIRING), or null if none
|
||||||
|
* @param result the evaluator outcome
|
||||||
|
* @param rule the rule being evaluated
|
||||||
|
* @param now wall-clock instant for the current tick
|
||||||
|
* @return the new or updated AlertInstance, or empty when nothing should change
|
||||||
|
*/
|
||||||
|
public static Optional<AlertInstance> apply(
|
||||||
|
AlertInstance current, EvalResult result, AlertRule rule, Instant now) {
|
||||||
|
|
||||||
|
if (result instanceof EvalResult.Clear) return onClear(current, now);
|
||||||
|
if (result instanceof EvalResult.Firing f) return onFiring(current, f, rule, now);
|
||||||
|
// EvalResult.Error and EvalResult.Batch — no action (Batch handled by the job directly)
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Clear branch
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private static Optional<AlertInstance> onClear(AlertInstance current, Instant now) {
|
||||||
|
if (current == null) return Optional.empty(); // no open instance — no-op
|
||||||
|
if (current.state() == AlertState.RESOLVED) return Optional.empty(); // already resolved
|
||||||
|
// Any open state (PENDING / FIRING) → RESOLVED
|
||||||
|
return Optional.of(current
|
||||||
|
.withState(AlertState.RESOLVED)
|
||||||
|
.withResolvedAt(now));
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Firing branch
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private static Optional<AlertInstance> onFiring(
|
||||||
|
AlertInstance current, EvalResult.Firing f, AlertRule rule, Instant now) {
|
||||||
|
|
||||||
|
if (current == null) {
|
||||||
|
// No open instance — create a new one
|
||||||
|
AlertState initial = rule.forDurationSeconds() > 0
|
||||||
|
? AlertState.PENDING
|
||||||
|
: AlertState.FIRING;
|
||||||
|
return Optional.of(newInstance(rule, f, initial, now));
|
||||||
|
}
|
||||||
|
|
||||||
|
return switch (current.state()) {
|
||||||
|
case PENDING -> {
|
||||||
|
// Check whether the forDuration window has elapsed
|
||||||
|
Instant promoteAt = current.firedAt().plusSeconds(rule.forDurationSeconds());
|
||||||
|
if (!promoteAt.isAfter(now)) {
|
||||||
|
// Promote to FIRING; keep the original firedAt (that's when it first appeared)
|
||||||
|
yield Optional.of(current
|
||||||
|
.withState(AlertState.FIRING)
|
||||||
|
.withFiredAt(now));
|
||||||
|
}
|
||||||
|
// Still within forDuration — stay PENDING, nothing to persist
|
||||||
|
yield Optional.empty();
|
||||||
|
}
|
||||||
|
// FIRING — re-notification cadence handled by the dispatcher
|
||||||
|
case FIRING -> Optional.empty();
|
||||||
|
// RESOLVED should never appear as the "current open" instance, but guard anyway
|
||||||
|
case RESOLVED -> Optional.empty();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Factory helpers
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a brand-new AlertInstance from a rule + Firing result.
|
||||||
|
* title/message are left empty here; the job enriches them via MustacheRenderer after.
|
||||||
|
*/
|
||||||
|
static AlertInstance newInstance(AlertRule rule, EvalResult.Firing f, AlertState state, Instant now) {
|
||||||
|
List<AlertRuleTarget> targets = rule.targets() != null ? rule.targets() : List.of();
|
||||||
|
List<String> targetUserIds = targets.stream()
|
||||||
|
.filter(t -> t.kind() == TargetKind.USER)
|
||||||
|
.map(AlertRuleTarget::targetId)
|
||||||
|
.toList();
|
||||||
|
List<UUID> targetGroupIds = targets.stream()
|
||||||
|
.filter(t -> t.kind() == TargetKind.GROUP)
|
||||||
|
.map(t -> UUID.fromString(t.targetId()))
|
||||||
|
.toList();
|
||||||
|
List<String> targetRoleNames = targets.stream()
|
||||||
|
.filter(t -> t.kind() == TargetKind.ROLE)
|
||||||
|
.map(AlertRuleTarget::targetId)
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
return new AlertInstance(
|
||||||
|
UUID.randomUUID(),
|
||||||
|
rule.id(),
|
||||||
|
Map.of(), // ruleSnapshot — caller (job) fills in via ObjectMapper
|
||||||
|
rule.environmentId(),
|
||||||
|
state,
|
||||||
|
rule.severity() != null ? rule.severity() : AlertSeverity.WARNING,
|
||||||
|
now, // firedAt
|
||||||
|
null, // ackedAt
|
||||||
|
null, // ackedBy
|
||||||
|
null, // resolvedAt
|
||||||
|
null, // lastNotifiedAt
|
||||||
|
null, // readAt
|
||||||
|
null, // deletedAt
|
||||||
|
false, // silenced
|
||||||
|
f.currentValue(),
|
||||||
|
f.threshold(),
|
||||||
|
f.context() != null ? f.context() : Map.of(),
|
||||||
|
"", // title — rendered by job
|
||||||
|
"", // message — rendered by job
|
||||||
|
targetUserIds,
|
||||||
|
targetGroupIds,
|
||||||
|
targetRoleNames);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,144 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.notify.MustacheRenderer;
|
||||||
|
import io.cameleer.server.app.alerting.notify.NotificationContextBuilder;
|
||||||
|
import io.cameleer.server.core.alerting.*;
|
||||||
|
import io.cameleer.server.core.runtime.Environment;
|
||||||
|
import io.cameleer.server.core.runtime.EnvironmentRepository;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
|
import java.time.Clock;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Applies a {@link EvalResult.Batch} result to persistent state inside a single
|
||||||
|
* transaction: instance writes, notification enqueues, and the rule's cursor
|
||||||
|
* advance + {@code releaseClaim} either all commit or all roll back together.
|
||||||
|
* <p>
|
||||||
|
* Lives in its own bean so the {@code @Transactional} annotation engages via the
|
||||||
|
* Spring proxy when invoked from {@link AlertEvaluatorJob#tick()}; calling it as
|
||||||
|
* {@code this.apply(...)} from {@code AlertEvaluatorJob} (a bean calling its own
|
||||||
|
* method) would bypass the proxy and silently disable the transaction.
|
||||||
|
* <p>
|
||||||
|
* Phase 2 of the per-exchange exactly-once plan (see
|
||||||
|
* {@code docs/superpowers/plans/2026-04-22-per-exchange-exactly-once.md}).
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class BatchResultApplier {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(BatchResultApplier.class);
|
||||||
|
|
||||||
|
private final AlertRuleRepository ruleRepo;
|
||||||
|
private final AlertInstanceRepository instanceRepo;
|
||||||
|
private final AlertNotificationRepository notificationRepo;
|
||||||
|
private final MustacheRenderer renderer;
|
||||||
|
private final NotificationContextBuilder contextBuilder;
|
||||||
|
private final EnvironmentRepository environmentRepo;
|
||||||
|
private final ObjectMapper objectMapper;
|
||||||
|
private final Clock clock;
|
||||||
|
|
||||||
|
public BatchResultApplier(
|
||||||
|
AlertRuleRepository ruleRepo,
|
||||||
|
AlertInstanceRepository instanceRepo,
|
||||||
|
AlertNotificationRepository notificationRepo,
|
||||||
|
MustacheRenderer renderer,
|
||||||
|
NotificationContextBuilder contextBuilder,
|
||||||
|
EnvironmentRepository environmentRepo,
|
||||||
|
ObjectMapper objectMapper,
|
||||||
|
Clock alertingClock) {
|
||||||
|
this.ruleRepo = ruleRepo;
|
||||||
|
this.instanceRepo = instanceRepo;
|
||||||
|
this.notificationRepo = notificationRepo;
|
||||||
|
this.renderer = renderer;
|
||||||
|
this.contextBuilder = contextBuilder;
|
||||||
|
this.environmentRepo = environmentRepo;
|
||||||
|
this.objectMapper = objectMapper;
|
||||||
|
this.clock = alertingClock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically apply a Batch result for a single rule:
|
||||||
|
* <ol>
|
||||||
|
* <li>persist a FIRING instance per firing + enqueue its notifications</li>
|
||||||
|
* <li>advance the rule's cursor ({@code evalState}) iff the batch supplied one</li>
|
||||||
|
* <li>release the claim with the new {@code nextRun} + {@code evalState}</li>
|
||||||
|
* </ol>
|
||||||
|
* Any exception thrown from the repo calls rolls back every write — including
|
||||||
|
* the cursor advance — so the rule is replayable on the next tick.
|
||||||
|
*/
|
||||||
|
@Transactional
|
||||||
|
public void apply(AlertRule rule, EvalResult.Batch batch, Instant nextRun) {
|
||||||
|
for (EvalResult.Firing f : batch.firings()) {
|
||||||
|
applyBatchFiring(rule, f);
|
||||||
|
}
|
||||||
|
Map<String, Object> nextEvalState =
|
||||||
|
batch.nextEvalState().isEmpty() ? rule.evalState() : batch.nextEvalState();
|
||||||
|
ruleRepo.releaseClaim(rule.id(), nextRun, nextEvalState);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Batch (PER_EXCHANGE) mode: always create a fresh FIRING instance per Firing entry.
|
||||||
|
* No forDuration check — each exchange is its own event.
|
||||||
|
*/
|
||||||
|
private void applyBatchFiring(AlertRule rule, EvalResult.Firing f) {
|
||||||
|
Instant now = Instant.now(clock);
|
||||||
|
AlertInstance instance = AlertStateTransitions.newInstance(rule, f, AlertState.FIRING, now)
|
||||||
|
.withRuleSnapshot(snapshotRule(rule));
|
||||||
|
AlertInstance enriched = enrichTitleMessage(rule, instance);
|
||||||
|
AlertInstance persisted = instanceRepo.save(enriched);
|
||||||
|
enqueueNotifications(rule, persisted, now);
|
||||||
|
}
|
||||||
|
|
||||||
|
private AlertInstance enrichTitleMessage(AlertRule rule, AlertInstance instance) {
|
||||||
|
Environment env = environmentRepo.findById(rule.environmentId()).orElse(null);
|
||||||
|
Map<String, Object> ctx = contextBuilder.build(rule, instance, env, null);
|
||||||
|
String title = renderer.render(rule.notificationTitleTmpl(), ctx);
|
||||||
|
String message = renderer.render(rule.notificationMessageTmpl(), ctx);
|
||||||
|
return instance.withTitleMessage(title, message);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void enqueueNotifications(AlertRule rule, AlertInstance instance, Instant now) {
|
||||||
|
for (WebhookBinding w : rule.webhooks()) {
|
||||||
|
Map<String, Object> payload = buildPayload(rule, instance);
|
||||||
|
notificationRepo.save(new AlertNotification(
|
||||||
|
UUID.randomUUID(),
|
||||||
|
instance.id(),
|
||||||
|
w.id(),
|
||||||
|
w.outboundConnectionId(),
|
||||||
|
NotificationStatus.PENDING,
|
||||||
|
0,
|
||||||
|
now,
|
||||||
|
null, null, null, null,
|
||||||
|
payload,
|
||||||
|
null,
|
||||||
|
now));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> buildPayload(AlertRule rule, AlertInstance instance) {
|
||||||
|
Environment env = environmentRepo.findById(rule.environmentId()).orElse(null);
|
||||||
|
return contextBuilder.build(rule, instance, env, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private Map<String, Object> snapshotRule(AlertRule rule) {
|
||||||
|
try {
|
||||||
|
Map<String, Object> raw = objectMapper.convertValue(rule, Map.class);
|
||||||
|
// Map.copyOf (used in AlertInstance compact ctor) rejects null values —
|
||||||
|
// strip them so the snapshot is safe to store.
|
||||||
|
Map<String, Object> safe = new LinkedHashMap<>();
|
||||||
|
raw.forEach((k, v) -> { if (v != null) safe.put(k, v); });
|
||||||
|
return safe;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Failed to snapshot rule {}: {}", rule.id(), e.getMessage());
|
||||||
|
return Map.of("id", rule.id().toString(), "name", rule.name());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertCondition;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
|
||||||
|
public interface ConditionEvaluator<C extends AlertCondition> {
|
||||||
|
|
||||||
|
ConditionKind kind();
|
||||||
|
|
||||||
|
EvalResult evaluate(C condition, AlertRule rule, EvalContext ctx);
|
||||||
|
}
|
||||||
@@ -0,0 +1,58 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
import io.cameleer.server.core.alerting.DeploymentStateCondition;
|
||||||
|
import io.cameleer.server.core.runtime.App;
|
||||||
|
import io.cameleer.server.core.runtime.AppRepository;
|
||||||
|
import io.cameleer.server.core.runtime.Deployment;
|
||||||
|
import io.cameleer.server.core.runtime.DeploymentRepository;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class DeploymentStateEvaluator implements ConditionEvaluator<DeploymentStateCondition> {
|
||||||
|
|
||||||
|
private final AppRepository appRepo;
|
||||||
|
private final DeploymentRepository deploymentRepo;
|
||||||
|
|
||||||
|
public DeploymentStateEvaluator(AppRepository appRepo, DeploymentRepository deploymentRepo) {
|
||||||
|
this.appRepo = appRepo;
|
||||||
|
this.deploymentRepo = deploymentRepo;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ConditionKind kind() { return ConditionKind.DEPLOYMENT_STATE; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EvalResult evaluate(DeploymentStateCondition c, AlertRule rule, EvalContext ctx) {
|
||||||
|
String appSlug = c.scope() != null ? c.scope().appSlug() : null;
|
||||||
|
App app = (appSlug != null)
|
||||||
|
? appRepo.findByEnvironmentIdAndSlug(rule.environmentId(), appSlug).orElse(null)
|
||||||
|
: null;
|
||||||
|
|
||||||
|
if (app == null) return EvalResult.Clear.INSTANCE;
|
||||||
|
|
||||||
|
Set<String> wanted = Set.copyOf(c.states());
|
||||||
|
List<Deployment> hits = deploymentRepo.findByAppId(app.id()).stream()
|
||||||
|
.filter(d -> wanted.contains(d.status().name()))
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
if (hits.isEmpty()) return EvalResult.Clear.INSTANCE;
|
||||||
|
|
||||||
|
Deployment d = hits.get(0);
|
||||||
|
return new EvalResult.Firing(
|
||||||
|
(double) hits.size(), null,
|
||||||
|
Map.of(
|
||||||
|
"deployment", Map.of(
|
||||||
|
"id", d.id().toString(),
|
||||||
|
"status", d.status().name()
|
||||||
|
),
|
||||||
|
"app", Map.of("slug", app.slug())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
|
||||||
|
public record EvalContext(String tenantId, Instant now, TickCache tickCache) {}
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public sealed interface EvalResult {
|
||||||
|
|
||||||
|
record Firing(Double currentValue, Double threshold, Map<String, Object> context) implements EvalResult {
|
||||||
|
public Firing {
|
||||||
|
context = context == null ? Map.of() : Map.copyOf(context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
record Clear() implements EvalResult {
|
||||||
|
public static final Clear INSTANCE = new Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
record Error(Throwable cause) implements EvalResult {}
|
||||||
|
|
||||||
|
record Batch(List<Firing> firings, Map<String, Object> nextEvalState) implements EvalResult {
|
||||||
|
public Batch {
|
||||||
|
firings = firings == null ? List.of() : List.copyOf(firings);
|
||||||
|
nextEvalState = nextEvalState == null ? Map.of() : Map.copyOf(nextEvalState);
|
||||||
|
}
|
||||||
|
/** Convenience: a Batch with no cursor update (first-run empty, or no matches). */
|
||||||
|
public static Batch empty() {
|
||||||
|
return new Batch(List.of(), Map.of());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,187 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.config.AlertingProperties;
|
||||||
|
import io.cameleer.server.app.search.ClickHouseSearchIndex;
|
||||||
|
import io.cameleer.server.core.alerting.AlertMatchSpec;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
import io.cameleer.server.core.alerting.ExchangeMatchCondition;
|
||||||
|
import io.cameleer.server.core.alerting.FireMode;
|
||||||
|
import io.cameleer.server.core.runtime.EnvironmentRepository;
|
||||||
|
import io.cameleer.server.core.search.ExecutionSummary;
|
||||||
|
import io.cameleer.server.core.search.SearchRequest;
|
||||||
|
import io.cameleer.server.core.search.SearchResult;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class ExchangeMatchEvaluator implements ConditionEvaluator<ExchangeMatchCondition> {
|
||||||
|
|
||||||
|
private final ClickHouseSearchIndex searchIndex;
|
||||||
|
private final EnvironmentRepository envRepo;
|
||||||
|
private final AlertingProperties alertingProperties;
|
||||||
|
|
||||||
|
public ExchangeMatchEvaluator(ClickHouseSearchIndex searchIndex,
|
||||||
|
EnvironmentRepository envRepo,
|
||||||
|
AlertingProperties alertingProperties) {
|
||||||
|
this.searchIndex = searchIndex;
|
||||||
|
this.envRepo = envRepo;
|
||||||
|
this.alertingProperties = alertingProperties;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ConditionKind kind() { return ConditionKind.EXCHANGE_MATCH; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EvalResult evaluate(ExchangeMatchCondition c, AlertRule rule, EvalContext ctx) {
|
||||||
|
String envSlug = envRepo.findById(rule.environmentId())
|
||||||
|
.map(e -> e.slug())
|
||||||
|
.orElse(null);
|
||||||
|
|
||||||
|
return switch (c.fireMode()) {
|
||||||
|
case COUNT_IN_WINDOW -> evaluateCount(c, rule, ctx, envSlug);
|
||||||
|
case PER_EXCHANGE -> evaluatePerExchange(c, rule, ctx, envSlug);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── COUNT_IN_WINDOW ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
private EvalResult evaluateCount(ExchangeMatchCondition c, AlertRule rule,
|
||||||
|
EvalContext ctx, String envSlug) {
|
||||||
|
String appSlug = c.scope() != null ? c.scope().appSlug() : null;
|
||||||
|
String routeId = c.scope() != null ? c.scope().routeId() : null;
|
||||||
|
ExchangeMatchCondition.ExchangeFilter filter = c.filter();
|
||||||
|
|
||||||
|
var spec = new AlertMatchSpec(
|
||||||
|
ctx.tenantId(),
|
||||||
|
envSlug,
|
||||||
|
appSlug,
|
||||||
|
routeId,
|
||||||
|
filter != null ? filter.status() : null,
|
||||||
|
filter != null ? filter.attributes() : Map.of(),
|
||||||
|
ctx.now().minusSeconds(c.windowSeconds()),
|
||||||
|
ctx.now(),
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
long count = searchIndex.countExecutionsForAlerting(spec);
|
||||||
|
if (count <= c.threshold()) return EvalResult.Clear.INSTANCE;
|
||||||
|
|
||||||
|
return new EvalResult.Firing(
|
||||||
|
(double) count,
|
||||||
|
c.threshold().doubleValue(),
|
||||||
|
Map.of(
|
||||||
|
"app", Map.of("slug", appSlug == null ? "" : appSlug),
|
||||||
|
"route", Map.of("id", routeId == null ? "" : routeId)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── PER_EXCHANGE ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
private EvalResult evaluatePerExchange(ExchangeMatchCondition c, AlertRule rule,
|
||||||
|
EvalContext ctx, String envSlug) {
|
||||||
|
String appSlug = c.scope() != null ? c.scope().appSlug() : null;
|
||||||
|
String routeId = c.scope() != null ? c.scope().routeId() : null;
|
||||||
|
ExchangeMatchCondition.ExchangeFilter filter = c.filter();
|
||||||
|
|
||||||
|
// Resolve composite cursor: (startTime, executionId)
|
||||||
|
Instant cursorTs;
|
||||||
|
String cursorId;
|
||||||
|
Object raw = rule.evalState().get("lastExchangeCursor");
|
||||||
|
if (raw instanceof String s && !s.isBlank()) {
|
||||||
|
int pipe = s.indexOf('|');
|
||||||
|
if (pipe < 0) {
|
||||||
|
// Malformed — treat as first-run (with deploy-backlog-cap clamp).
|
||||||
|
cursorTs = firstRunCursorTs(rule, ctx);
|
||||||
|
cursorId = "";
|
||||||
|
} else {
|
||||||
|
cursorTs = Instant.parse(s.substring(0, pipe));
|
||||||
|
cursorId = s.substring(pipe + 1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// First run — bounded by rule.createdAt, empty executionId so any real id sorts after it.
|
||||||
|
// Clamp to deploy-backlog-cap to avoid backlog flooding for long-lived rules on first
|
||||||
|
// post-deploy tick. Normal-advance path (valid cursor above) is intentionally unaffected.
|
||||||
|
cursorTs = firstRunCursorTs(rule, ctx);
|
||||||
|
cursorId = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
var req = new SearchRequest(
|
||||||
|
filter != null ? filter.status() : null,
|
||||||
|
cursorTs, // timeFrom
|
||||||
|
ctx.now(), // timeTo
|
||||||
|
null, null, null, // durationMin/Max, correlationId
|
||||||
|
null, null, null, null, // text variants
|
||||||
|
routeId,
|
||||||
|
null, // instanceId
|
||||||
|
null, // processorType
|
||||||
|
appSlug,
|
||||||
|
null, // instanceIds
|
||||||
|
0,
|
||||||
|
50,
|
||||||
|
"startTime",
|
||||||
|
"asc", // asc so we process oldest first
|
||||||
|
cursorId.isEmpty() ? null : cursorId, // afterExecutionId — null on first run enables >=
|
||||||
|
envSlug
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchResult<ExecutionSummary> result = searchIndex.search(req);
|
||||||
|
List<ExecutionSummary> matches = result.data();
|
||||||
|
|
||||||
|
if (matches.isEmpty()) return EvalResult.Batch.empty();
|
||||||
|
|
||||||
|
// Ensure deterministic ordering for cursor advance
|
||||||
|
matches = new ArrayList<>(matches);
|
||||||
|
matches.sort(Comparator
|
||||||
|
.comparing(ExecutionSummary::startTime)
|
||||||
|
.thenComparing(ExecutionSummary::executionId));
|
||||||
|
|
||||||
|
ExecutionSummary last = matches.get(matches.size() - 1);
|
||||||
|
String nextCursorSerialized = last.startTime().toString() + "|" + last.executionId();
|
||||||
|
|
||||||
|
List<EvalResult.Firing> firings = new ArrayList<>();
|
||||||
|
for (ExecutionSummary ex : matches) {
|
||||||
|
Map<String, Object> ctx2 = new HashMap<>();
|
||||||
|
ctx2.put("exchange", Map.of(
|
||||||
|
"id", ex.executionId(),
|
||||||
|
"routeId", ex.routeId() == null ? "" : ex.routeId(),
|
||||||
|
"status", ex.status() == null ? "" : ex.status(),
|
||||||
|
"startTime", ex.startTime() == null ? "" : ex.startTime().toString()
|
||||||
|
));
|
||||||
|
ctx2.put("app", Map.of("slug", ex.applicationId() == null ? "" : ex.applicationId()));
|
||||||
|
firings.add(new EvalResult.Firing(1.0, null, ctx2));
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, Object> nextEvalState = new HashMap<>(rule.evalState());
|
||||||
|
nextEvalState.put("lastExchangeCursor", nextCursorSerialized);
|
||||||
|
return new EvalResult.Batch(firings, nextEvalState);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* First-run cursor timestamp: {@code rule.createdAt()}, clamped to
|
||||||
|
* {@code now - perExchangeDeployBacklogCapSeconds} so a long-lived PER_EXCHANGE rule
|
||||||
|
* doesn't scan from its creation date forward on first post-deploy tick.
|
||||||
|
* <p>
|
||||||
|
* Cap ≤ 0 disables the clamp (first-run falls back to {@code rule.createdAt()} verbatim).
|
||||||
|
* Applied only on first-run / malformed-cursor paths — the normal-advance path is
|
||||||
|
* intentionally unaffected so legitimate missed ticks are not silently skipped.
|
||||||
|
*/
|
||||||
|
private Instant firstRunCursorTs(AlertRule rule, EvalContext ctx) {
|
||||||
|
Instant cursorTs = rule.createdAt();
|
||||||
|
int capSeconds = alertingProperties.effectivePerExchangeDeployBacklogCapSeconds();
|
||||||
|
if (capSeconds > 0) {
|
||||||
|
Instant capFloor = ctx.now().minusSeconds(capSeconds);
|
||||||
|
if (cursorTs == null || cursorTs.isBefore(capFloor)) {
|
||||||
|
cursorTs = capFloor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cursorTs;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AggregationOp;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
import io.cameleer.server.core.alerting.JvmMetricCondition;
|
||||||
|
import io.cameleer.server.core.storage.MetricsQueryStore;
|
||||||
|
import io.cameleer.server.core.storage.model.MetricTimeSeries;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.OptionalDouble;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class JvmMetricEvaluator implements ConditionEvaluator<JvmMetricCondition> {
|
||||||
|
|
||||||
|
private final MetricsQueryStore metricsStore;
|
||||||
|
|
||||||
|
public JvmMetricEvaluator(MetricsQueryStore metricsStore) {
|
||||||
|
this.metricsStore = metricsStore;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ConditionKind kind() { return ConditionKind.JVM_METRIC; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EvalResult evaluate(JvmMetricCondition c, AlertRule rule, EvalContext ctx) {
|
||||||
|
String agentId = c.scope() != null ? c.scope().agentId() : null;
|
||||||
|
if (agentId == null) return EvalResult.Clear.INSTANCE;
|
||||||
|
|
||||||
|
Map<String, List<MetricTimeSeries.Bucket>> series = metricsStore.queryTimeSeries(
|
||||||
|
agentId,
|
||||||
|
List.of(c.metric()),
|
||||||
|
ctx.now().minusSeconds(c.windowSeconds()),
|
||||||
|
ctx.now(),
|
||||||
|
1
|
||||||
|
);
|
||||||
|
|
||||||
|
List<MetricTimeSeries.Bucket> buckets = series.get(c.metric());
|
||||||
|
if (buckets == null || buckets.isEmpty()) return EvalResult.Clear.INSTANCE;
|
||||||
|
|
||||||
|
OptionalDouble aggregated = aggregate(buckets, c.aggregation());
|
||||||
|
if (aggregated.isEmpty()) return EvalResult.Clear.INSTANCE;
|
||||||
|
|
||||||
|
double actual = aggregated.getAsDouble();
|
||||||
|
|
||||||
|
boolean fire = switch (c.comparator()) {
|
||||||
|
case GT -> actual > c.threshold();
|
||||||
|
case GTE -> actual >= c.threshold();
|
||||||
|
case LT -> actual < c.threshold();
|
||||||
|
case LTE -> actual <= c.threshold();
|
||||||
|
case EQ -> actual == c.threshold();
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!fire) return EvalResult.Clear.INSTANCE;
|
||||||
|
|
||||||
|
return new EvalResult.Firing(actual, c.threshold(),
|
||||||
|
Map.of(
|
||||||
|
"metric", c.metric(),
|
||||||
|
"agent", Map.of("id", agentId)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private OptionalDouble aggregate(List<MetricTimeSeries.Bucket> buckets, AggregationOp op) {
|
||||||
|
return switch (op) {
|
||||||
|
case MAX -> buckets.stream().mapToDouble(MetricTimeSeries.Bucket::value).max();
|
||||||
|
case MIN -> buckets.stream().mapToDouble(MetricTimeSeries.Bucket::value).min();
|
||||||
|
case AVG -> buckets.stream().mapToDouble(MetricTimeSeries.Bucket::value).average();
|
||||||
|
case LATEST -> buckets.stream()
|
||||||
|
.max(java.util.Comparator.comparing(MetricTimeSeries.Bucket::time))
|
||||||
|
.map(b -> OptionalDouble.of(b.value()))
|
||||||
|
.orElse(OptionalDouble.empty());
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,82 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.search.ClickHouseLogStore;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
import io.cameleer.server.core.alerting.LogPatternCondition;
|
||||||
|
import io.cameleer.server.core.runtime.EnvironmentRepository;
|
||||||
|
import io.cameleer.server.core.search.LogSearchRequest;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class LogPatternEvaluator implements ConditionEvaluator<LogPatternCondition> {
|
||||||
|
|
||||||
|
private final ClickHouseLogStore logStore;
|
||||||
|
private final EnvironmentRepository envRepo;
|
||||||
|
|
||||||
|
public LogPatternEvaluator(ClickHouseLogStore logStore, EnvironmentRepository envRepo) {
|
||||||
|
this.logStore = logStore;
|
||||||
|
this.envRepo = envRepo;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ConditionKind kind() { return ConditionKind.LOG_PATTERN; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EvalResult evaluate(LogPatternCondition c, AlertRule rule, EvalContext ctx) {
|
||||||
|
String envSlug = envRepo.findById(rule.environmentId())
|
||||||
|
.map(e -> e.slug())
|
||||||
|
.orElse(null);
|
||||||
|
|
||||||
|
String appSlug = c.scope() != null ? c.scope().appSlug() : null;
|
||||||
|
|
||||||
|
Instant from = ctx.now().minusSeconds(c.windowSeconds());
|
||||||
|
Instant to = ctx.now();
|
||||||
|
|
||||||
|
// Build a stable cache key so identical queries within the same tick are coalesced.
|
||||||
|
String cacheKey = String.join("|",
|
||||||
|
envSlug == null ? "" : envSlug,
|
||||||
|
appSlug == null ? "" : appSlug,
|
||||||
|
c.level() == null ? "" : c.level(),
|
||||||
|
c.pattern() == null ? "" : c.pattern(),
|
||||||
|
from.toString(),
|
||||||
|
to.toString()
|
||||||
|
);
|
||||||
|
|
||||||
|
long count = ctx.tickCache().getOrCompute(cacheKey, () -> {
|
||||||
|
var req = new LogSearchRequest(
|
||||||
|
c.pattern(),
|
||||||
|
c.level() != null ? List.of(c.level()) : List.of(),
|
||||||
|
appSlug,
|
||||||
|
null, // instanceId
|
||||||
|
null, // exchangeId
|
||||||
|
null, // logger
|
||||||
|
envSlug,
|
||||||
|
null, // sources
|
||||||
|
from,
|
||||||
|
to,
|
||||||
|
null, // cursor
|
||||||
|
1, // limit (count query; value irrelevant)
|
||||||
|
"desc", // sort
|
||||||
|
null // instanceIds
|
||||||
|
);
|
||||||
|
return logStore.countLogs(req);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (count <= c.threshold()) return EvalResult.Clear.INSTANCE;
|
||||||
|
|
||||||
|
return new EvalResult.Firing(
|
||||||
|
(double) count,
|
||||||
|
(double) c.threshold(),
|
||||||
|
Map.of(
|
||||||
|
"app", Map.of("slug", appSlug == null ? "" : appSlug),
|
||||||
|
"pattern", c.pattern() == null ? "" : c.pattern(),
|
||||||
|
"level", c.level() == null ? "" : c.level()
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,72 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.metrics.AlertingMetrics;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
|
||||||
|
import java.time.Clock;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.ArrayDeque;
|
||||||
|
import java.util.Deque;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
public class PerKindCircuitBreaker {
|
||||||
|
|
||||||
|
private record State(Deque<Instant> failures, Instant openUntil) {}
|
||||||
|
|
||||||
|
private final int threshold;
|
||||||
|
private final Duration window;
|
||||||
|
private final Duration cooldown;
|
||||||
|
private final Clock clock;
|
||||||
|
private final ConcurrentHashMap<ConditionKind, State> byKind = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
/** Optional metrics — set via {@link #setMetrics} after construction (avoids circular bean deps). */
|
||||||
|
private volatile AlertingMetrics metrics;
|
||||||
|
|
||||||
|
/** Production constructor — uses system clock. */
|
||||||
|
public PerKindCircuitBreaker(int threshold, int windowSeconds, int cooldownSeconds) {
|
||||||
|
this(threshold, windowSeconds, cooldownSeconds, Clock.systemDefaultZone());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Test constructor — allows a fixed/controllable clock. */
|
||||||
|
public PerKindCircuitBreaker(int threshold, int windowSeconds, int cooldownSeconds, Clock clock) {
|
||||||
|
this.threshold = threshold;
|
||||||
|
this.window = Duration.ofSeconds(windowSeconds);
|
||||||
|
this.cooldown = Duration.ofSeconds(cooldownSeconds);
|
||||||
|
this.clock = clock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Wire metrics after construction to avoid circular Spring dependency. */
|
||||||
|
public void setMetrics(AlertingMetrics metrics) {
|
||||||
|
this.metrics = metrics;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void recordFailure(ConditionKind kind) {
|
||||||
|
final boolean[] justOpened = {false};
|
||||||
|
byKind.compute(kind, (k, s) -> {
|
||||||
|
Deque<Instant> deque = (s == null) ? new ArrayDeque<>() : new ArrayDeque<>(s.failures());
|
||||||
|
Instant now = Instant.now(clock);
|
||||||
|
Instant cutoff = now.minus(window);
|
||||||
|
while (!deque.isEmpty() && deque.peekFirst().isBefore(cutoff)) deque.pollFirst();
|
||||||
|
deque.addLast(now);
|
||||||
|
boolean wasOpen = s != null && s.openUntil() != null && now.isBefore(s.openUntil());
|
||||||
|
Instant openUntil = (deque.size() >= threshold) ? now.plus(cooldown) : null;
|
||||||
|
if (openUntil != null && !wasOpen) {
|
||||||
|
justOpened[0] = true;
|
||||||
|
}
|
||||||
|
return new State(deque, openUntil);
|
||||||
|
});
|
||||||
|
if (justOpened[0] && metrics != null) {
|
||||||
|
metrics.circuitOpened(kind);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isOpen(ConditionKind kind) {
|
||||||
|
State s = byKind.get(kind);
|
||||||
|
return s != null && s.openUntil() != null && Instant.now(clock).isBefore(s.openUntil());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void recordSuccess(ConditionKind kind) {
|
||||||
|
byKind.compute(kind, (k, s) -> new State(new ArrayDeque<>(), null));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
import io.cameleer.server.core.alerting.RouteMetricCondition;
|
||||||
|
import io.cameleer.server.core.runtime.EnvironmentRepository;
|
||||||
|
import io.cameleer.server.core.search.ExecutionStats;
|
||||||
|
import io.cameleer.server.core.storage.StatsStore;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class RouteMetricEvaluator implements ConditionEvaluator<RouteMetricCondition> {
|
||||||
|
|
||||||
|
private final StatsStore statsStore;
|
||||||
|
private final EnvironmentRepository envRepo;
|
||||||
|
|
||||||
|
public RouteMetricEvaluator(StatsStore statsStore, EnvironmentRepository envRepo) {
|
||||||
|
this.statsStore = statsStore;
|
||||||
|
this.envRepo = envRepo;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ConditionKind kind() { return ConditionKind.ROUTE_METRIC; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EvalResult evaluate(RouteMetricCondition c, AlertRule rule, EvalContext ctx) {
|
||||||
|
Instant from = ctx.now().minusSeconds(c.windowSeconds());
|
||||||
|
Instant to = ctx.now();
|
||||||
|
|
||||||
|
String envSlug = envRepo.findById(rule.environmentId())
|
||||||
|
.map(e -> e.slug())
|
||||||
|
.orElse(null);
|
||||||
|
|
||||||
|
String appSlug = c.scope() != null ? c.scope().appSlug() : null;
|
||||||
|
String routeId = c.scope() != null ? c.scope().routeId() : null;
|
||||||
|
|
||||||
|
ExecutionStats stats;
|
||||||
|
if (routeId != null) {
|
||||||
|
stats = statsStore.statsForRoute(from, to, routeId, appSlug, envSlug);
|
||||||
|
} else if (appSlug != null) {
|
||||||
|
stats = statsStore.statsForApp(from, to, appSlug, envSlug);
|
||||||
|
} else {
|
||||||
|
stats = statsStore.stats(from, to, envSlug);
|
||||||
|
}
|
||||||
|
|
||||||
|
double actual = switch (c.metric()) {
|
||||||
|
case ERROR_RATE -> errorRate(stats);
|
||||||
|
case AVG_DURATION_MS -> (double) stats.avgDurationMs();
|
||||||
|
case P99_LATENCY_MS -> (double) stats.p99LatencyMs();
|
||||||
|
case THROUGHPUT -> (double) stats.totalCount();
|
||||||
|
case ERROR_COUNT -> (double) stats.failedCount();
|
||||||
|
};
|
||||||
|
|
||||||
|
boolean fire = switch (c.comparator()) {
|
||||||
|
case GT -> actual > c.threshold();
|
||||||
|
case GTE -> actual >= c.threshold();
|
||||||
|
case LT -> actual < c.threshold();
|
||||||
|
case LTE -> actual <= c.threshold();
|
||||||
|
case EQ -> actual == c.threshold();
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!fire) return EvalResult.Clear.INSTANCE;
|
||||||
|
|
||||||
|
return new EvalResult.Firing(actual, c.threshold(),
|
||||||
|
Map.of(
|
||||||
|
"route", Map.of("id", routeId == null ? "" : routeId),
|
||||||
|
"app", Map.of("slug", appSlug == null ? "" : appSlug)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private double errorRate(ExecutionStats s) {
|
||||||
|
long total = s.totalCount();
|
||||||
|
return total == 0 ? 0.0 : (double) s.failedCount() / total;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
package io.cameleer.server.app.alerting.eval;
|
||||||
|
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
|
public class TickCache {
|
||||||
|
|
||||||
|
private final ConcurrentHashMap<String, Object> map = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public <T> T getOrCompute(String key, Supplier<T> supplier) {
|
||||||
|
return (T) map.computeIfAbsent(key, k -> supplier.get());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,279 @@
|
|||||||
|
package io.cameleer.server.app.alerting.metrics;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertState;
|
||||||
|
import io.cameleer.server.core.alerting.ConditionKind;
|
||||||
|
import io.cameleer.server.core.alerting.NotificationStatus;
|
||||||
|
import io.micrometer.core.instrument.Counter;
|
||||||
|
import io.micrometer.core.instrument.Gauge;
|
||||||
|
import io.micrometer.core.instrument.MeterRegistry;
|
||||||
|
import io.micrometer.core.instrument.Timer;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.EnumMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ConcurrentMap;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Micrometer-based metrics for the alerting subsystem.
|
||||||
|
* <p>
|
||||||
|
* Counters:
|
||||||
|
* <ul>
|
||||||
|
* <li>{@code alerting_eval_errors_total{kind}} — evaluation errors by condition kind</li>
|
||||||
|
* <li>{@code alerting_circuit_opened_total{kind}} — circuit breaker open transitions by kind</li>
|
||||||
|
* <li>{@code alerting_notifications_total{status}} — notification outcomes by status</li>
|
||||||
|
* </ul>
|
||||||
|
* Timers:
|
||||||
|
* <ul>
|
||||||
|
* <li>{@code alerting_eval_duration_seconds{kind}} — per-kind evaluation latency</li>
|
||||||
|
* <li>{@code alerting_webhook_delivery_duration_seconds} — webhook POST latency</li>
|
||||||
|
* </ul>
|
||||||
|
* Gauges (read from PostgreSQL, cached for {@link #DEFAULT_GAUGE_TTL} to amortise
|
||||||
|
* Prometheus scrapes that may fire every few seconds):
|
||||||
|
* <ul>
|
||||||
|
* <li>{@code alerting_rules_total{state=enabled|disabled}} — rule counts from {@code alert_rules}</li>
|
||||||
|
* <li>{@code alerting_instances_total{state}} — instance counts grouped from {@code alert_instances}</li>
|
||||||
|
* </ul>
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class AlertingMetrics {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(AlertingMetrics.class);
|
||||||
|
|
||||||
|
/** Default time-to-live for the gauge-supplier caches. */
|
||||||
|
static final Duration DEFAULT_GAUGE_TTL = Duration.ofSeconds(30);
|
||||||
|
|
||||||
|
private final MeterRegistry registry;
|
||||||
|
|
||||||
|
// Cached counters per kind (lazy-initialized)
|
||||||
|
private final ConcurrentMap<String, Counter> evalErrorCounters = new ConcurrentHashMap<>();
|
||||||
|
private final ConcurrentMap<String, Counter> circuitOpenCounters = new ConcurrentHashMap<>();
|
||||||
|
private final ConcurrentMap<String, Timer> evalDurationTimers = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
// Notification outcome counter per status
|
||||||
|
private final ConcurrentMap<String, Counter> notificationCounters = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
// Shared delivery timer
|
||||||
|
private final Timer webhookDeliveryTimer;
|
||||||
|
|
||||||
|
// TTL-cached gauge suppliers registered so tests can force a read cycle.
|
||||||
|
private final TtlCache enabledRulesCache;
|
||||||
|
private final TtlCache disabledRulesCache;
|
||||||
|
private final Map<AlertState, TtlCache> instancesByStateCaches;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Production constructor: wraps the Postgres-backed gauge suppliers in a
|
||||||
|
* 30-second TTL cache so Prometheus scrapes don't cause per-scrape DB queries.
|
||||||
|
*/
|
||||||
|
@Autowired
|
||||||
|
public AlertingMetrics(MeterRegistry registry, JdbcTemplate jdbc) {
|
||||||
|
this(registry,
|
||||||
|
() -> countRules(jdbc, true),
|
||||||
|
() -> countRules(jdbc, false),
|
||||||
|
state -> countInstances(jdbc, state),
|
||||||
|
DEFAULT_GAUGE_TTL,
|
||||||
|
Instant::now);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test-friendly constructor accepting the three gauge suppliers that are
|
||||||
|
* exercised in the {@link AlertingMetricsCachingTest} plan sketch. The
|
||||||
|
* {@code instancesSupplier} is used for every {@link AlertState}.
|
||||||
|
*/
|
||||||
|
AlertingMetrics(MeterRegistry registry,
|
||||||
|
Supplier<Long> enabledRulesSupplier,
|
||||||
|
Supplier<Long> disabledRulesSupplier,
|
||||||
|
Supplier<Long> instancesSupplier,
|
||||||
|
Duration gaugeTtl,
|
||||||
|
Supplier<Instant> clock) {
|
||||||
|
this(registry,
|
||||||
|
enabledRulesSupplier,
|
||||||
|
disabledRulesSupplier,
|
||||||
|
state -> instancesSupplier.get(),
|
||||||
|
gaugeTtl,
|
||||||
|
clock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Core constructor: accepts per-state instance supplier so production can
|
||||||
|
* query PostgreSQL with a different value per {@link AlertState}.
|
||||||
|
*/
|
||||||
|
private AlertingMetrics(MeterRegistry registry,
|
||||||
|
Supplier<Long> enabledRulesSupplier,
|
||||||
|
Supplier<Long> disabledRulesSupplier,
|
||||||
|
java.util.function.Function<AlertState, Long> instancesSupplier,
|
||||||
|
Duration gaugeTtl,
|
||||||
|
Supplier<Instant> clock) {
|
||||||
|
this.registry = registry;
|
||||||
|
|
||||||
|
// ── Static timers ───────────────────────────────────────────────
|
||||||
|
this.webhookDeliveryTimer = Timer.builder("alerting_webhook_delivery_duration_seconds")
|
||||||
|
.description("Latency of outbound webhook POST requests")
|
||||||
|
.register(registry);
|
||||||
|
|
||||||
|
// ── Gauge: rules by enabled/disabled (cached) ───────────────────
|
||||||
|
this.enabledRulesCache = new TtlCache(enabledRulesSupplier, gaugeTtl, clock);
|
||||||
|
this.disabledRulesCache = new TtlCache(disabledRulesSupplier, gaugeTtl, clock);
|
||||||
|
|
||||||
|
Gauge.builder("alerting_rules_total", enabledRulesCache, TtlCache::getAsDouble)
|
||||||
|
.tag("state", "enabled")
|
||||||
|
.description("Number of enabled alert rules")
|
||||||
|
.register(registry);
|
||||||
|
Gauge.builder("alerting_rules_total", disabledRulesCache, TtlCache::getAsDouble)
|
||||||
|
.tag("state", "disabled")
|
||||||
|
.description("Number of disabled alert rules")
|
||||||
|
.register(registry);
|
||||||
|
|
||||||
|
// ── Gauges: alert instances by state (cached) ───────────────────
|
||||||
|
this.instancesByStateCaches = new EnumMap<>(AlertState.class);
|
||||||
|
for (AlertState state : AlertState.values()) {
|
||||||
|
AlertState captured = state;
|
||||||
|
TtlCache cache = new TtlCache(() -> instancesSupplier.apply(captured), gaugeTtl, clock);
|
||||||
|
this.instancesByStateCaches.put(state, cache);
|
||||||
|
Gauge.builder("alerting_instances_total", cache, TtlCache::getAsDouble)
|
||||||
|
.tag("state", state.name().toLowerCase())
|
||||||
|
.description("Number of alert instances by state")
|
||||||
|
.register(registry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Public API ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the evaluation error counter for the given condition kind and rule.
|
||||||
|
*/
|
||||||
|
public void evalError(ConditionKind kind, UUID ruleId) {
|
||||||
|
String key = kind.name();
|
||||||
|
evalErrorCounters.computeIfAbsent(key, k ->
|
||||||
|
Counter.builder("alerting_eval_errors_total")
|
||||||
|
.tag("kind", kind.name())
|
||||||
|
.description("Alerting evaluation errors by condition kind")
|
||||||
|
.register(registry))
|
||||||
|
.increment();
|
||||||
|
log.debug("Alerting eval error for kind={} ruleId={}", kind, ruleId);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the circuit-breaker opened counter for the given condition kind.
|
||||||
|
*/
|
||||||
|
public void circuitOpened(ConditionKind kind) {
|
||||||
|
String key = kind.name();
|
||||||
|
circuitOpenCounters.computeIfAbsent(key, k ->
|
||||||
|
Counter.builder("alerting_circuit_opened_total")
|
||||||
|
.tag("kind", kind.name())
|
||||||
|
.description("Circuit breaker open transitions by condition kind")
|
||||||
|
.register(registry))
|
||||||
|
.increment();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the eval duration timer for the given condition kind (creates lazily if absent).
|
||||||
|
*/
|
||||||
|
public Timer evalDuration(ConditionKind kind) {
|
||||||
|
return evalDurationTimers.computeIfAbsent(kind.name(), k ->
|
||||||
|
Timer.builder("alerting_eval_duration_seconds")
|
||||||
|
.tag("kind", kind.name())
|
||||||
|
.description("Alerting condition evaluation latency by kind")
|
||||||
|
.register(registry));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The shared webhook delivery duration timer.
|
||||||
|
*/
|
||||||
|
public Timer webhookDeliveryDuration() {
|
||||||
|
return webhookDeliveryTimer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increment the notification outcome counter for the given status.
|
||||||
|
*/
|
||||||
|
public void notificationOutcome(NotificationStatus status) {
|
||||||
|
String key = status.name();
|
||||||
|
notificationCounters.computeIfAbsent(key, k ->
|
||||||
|
Counter.builder("alerting_notifications_total")
|
||||||
|
.tag("status", status.name().toLowerCase())
|
||||||
|
.description("Alerting notification outcomes by status")
|
||||||
|
.register(registry))
|
||||||
|
.increment();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Force a read of every TTL-cached gauge supplier. Used by tests to simulate
|
||||||
|
* a Prometheus scrape without needing a real registry scrape pipeline.
|
||||||
|
*/
|
||||||
|
void snapshotAllGauges() {
|
||||||
|
List<TtlCache> all = new ArrayList<>();
|
||||||
|
all.add(enabledRulesCache);
|
||||||
|
all.add(disabledRulesCache);
|
||||||
|
all.addAll(instancesByStateCaches.values());
|
||||||
|
for (TtlCache c : all) {
|
||||||
|
c.getAsDouble();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Gauge suppliers (queried at most once per TTL) ──────────────────
|
||||||
|
|
||||||
|
private static long countRules(JdbcTemplate jdbc, boolean enabled) {
|
||||||
|
try {
|
||||||
|
Long count = jdbc.queryForObject(
|
||||||
|
"SELECT COUNT(*) FROM alert_rules WHERE enabled = ?", Long.class, enabled);
|
||||||
|
return count == null ? 0L : count;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("alerting_rules gauge query failed: {}", e.getMessage());
|
||||||
|
return 0L;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static long countInstances(JdbcTemplate jdbc, AlertState state) {
|
||||||
|
try {
|
||||||
|
Long count = jdbc.queryForObject(
|
||||||
|
"SELECT COUNT(*) FROM alert_instances WHERE state = ?::alert_state_enum",
|
||||||
|
Long.class, state.name());
|
||||||
|
return count == null ? 0L : count;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("alerting_instances gauge query failed: {}", e.getMessage());
|
||||||
|
return 0L;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lightweight TTL cache around a {@code Supplier<Long>}. Every call to
|
||||||
|
* {@link #getAsDouble()} either returns the cached value (if {@code clock.get()
|
||||||
|
* - lastRead < ttl}) or invokes the delegate and refreshes the cache.
|
||||||
|
*
|
||||||
|
* <p>Used to amortise Postgres queries behind Prometheus gauges over a
|
||||||
|
* 30-second TTL (see {@link AlertingMetrics#DEFAULT_GAUGE_TTL}).
|
||||||
|
*/
|
||||||
|
static final class TtlCache {
|
||||||
|
private final Supplier<Long> delegate;
|
||||||
|
private final Duration ttl;
|
||||||
|
private final Supplier<Instant> clock;
|
||||||
|
private volatile Instant lastRead = Instant.MIN;
|
||||||
|
private volatile long cached = 0L;
|
||||||
|
|
||||||
|
TtlCache(Supplier<Long> delegate, Duration ttl, Supplier<Instant> clock) {
|
||||||
|
this.delegate = delegate;
|
||||||
|
this.ttl = ttl;
|
||||||
|
this.clock = clock;
|
||||||
|
}
|
||||||
|
|
||||||
|
synchronized double getAsDouble() {
|
||||||
|
Instant now = clock.get();
|
||||||
|
if (lastRead == Instant.MIN || Duration.between(lastRead, now).compareTo(ttl) >= 0) {
|
||||||
|
cached = delegate.get();
|
||||||
|
lastRead = now;
|
||||||
|
}
|
||||||
|
return cached;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
package io.cameleer.server.app.alerting.notify;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import javax.crypto.Mac;
|
||||||
|
import javax.crypto.spec.SecretKeySpec;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.HexFormat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes HMAC-SHA256 webhook signatures.
|
||||||
|
* <p>
|
||||||
|
* Output format: {@code sha256=<lowercase hex>}
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class HmacSigner {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Signs {@code body} with {@code secret} using HmacSHA256.
|
||||||
|
*
|
||||||
|
* @param secret plain-text secret (UTF-8 encoded)
|
||||||
|
* @param body request body bytes to sign
|
||||||
|
* @return {@code "sha256=" + hex(hmac)}
|
||||||
|
*/
|
||||||
|
public String sign(String secret, byte[] body) {
|
||||||
|
try {
|
||||||
|
Mac mac = Mac.getInstance("HmacSHA256");
|
||||||
|
mac.init(new SecretKeySpec(secret.getBytes(StandardCharsets.UTF_8), "HmacSHA256"));
|
||||||
|
byte[] digest = mac.doFinal(body);
|
||||||
|
return "sha256=" + HexFormat.of().formatHex(digest);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IllegalStateException("HMAC signing failed", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,107 @@
|
|||||||
|
package io.cameleer.server.app.alerting.notify;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.dto.UnreadCountResponse;
|
||||||
|
import io.cameleer.server.core.alerting.AlertInstance;
|
||||||
|
import io.cameleer.server.core.alerting.AlertInstanceRepository;
|
||||||
|
import io.cameleer.server.core.alerting.AlertSeverity;
|
||||||
|
import io.cameleer.server.core.alerting.AlertState;
|
||||||
|
import io.cameleer.server.core.rbac.RbacService;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.time.Clock;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Server-side query helper for the in-app alert inbox.
|
||||||
|
* <p>
|
||||||
|
* {@link #listInbox} returns alerts the user is allowed to see (targeted directly or via group/role).
|
||||||
|
* {@link #countUnread} is memoized per {@code (envId, userId)} for 5 seconds to avoid hammering
|
||||||
|
* the database on every page render. The memo caches the full per-severity breakdown so
|
||||||
|
* the UI can branch bell colour on the highest unread severity without a second call.
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class InAppInboxQuery {
|
||||||
|
|
||||||
|
private static final long MEMO_TTL_MS = 5_000L;
|
||||||
|
|
||||||
|
private final AlertInstanceRepository instanceRepo;
|
||||||
|
private final RbacService rbacService;
|
||||||
|
private final Clock clock;
|
||||||
|
|
||||||
|
/** Cache key for the unread count memo. */
|
||||||
|
private record Key(UUID envId, String userId) {}
|
||||||
|
|
||||||
|
/** Cache entry: cached response + expiry timestamp. */
|
||||||
|
private record Entry(UnreadCountResponse response, Instant expiresAt) {}
|
||||||
|
|
||||||
|
private final ConcurrentHashMap<Key, Entry> memo = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
public InAppInboxQuery(AlertInstanceRepository instanceRepo,
|
||||||
|
RbacService rbacService,
|
||||||
|
Clock alertingClock) {
|
||||||
|
this.instanceRepo = instanceRepo;
|
||||||
|
this.rbacService = rbacService;
|
||||||
|
this.clock = alertingClock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Full filtered variant: optional {@code states}, {@code severities}, {@code acked},
|
||||||
|
* and {@code read} narrow the result set. {@code null} or empty lists mean
|
||||||
|
* "no filter on that dimension". {@code acked}/{@code read} are tri-state:
|
||||||
|
* {@code null} = no filter, {@code TRUE} = only acked/read, {@code FALSE} = only unacked/unread.
|
||||||
|
*/
|
||||||
|
public List<AlertInstance> listInbox(UUID envId,
|
||||||
|
String userId,
|
||||||
|
List<AlertState> states,
|
||||||
|
List<AlertSeverity> severities,
|
||||||
|
Boolean acked,
|
||||||
|
Boolean read,
|
||||||
|
int limit) {
|
||||||
|
List<String> groupIds = resolveGroupIds(userId);
|
||||||
|
List<String> roleNames = resolveRoleNames(userId);
|
||||||
|
return instanceRepo.listForInbox(envId, groupIds, userId, roleNames,
|
||||||
|
states, severities, acked, read, limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the unread (un-acked) alert count for the user, broken down by severity.
|
||||||
|
* <p>
|
||||||
|
* Memoized for 5 seconds per {@code (envId, userId)}.
|
||||||
|
*/
|
||||||
|
public UnreadCountResponse countUnread(UUID envId, String userId) {
|
||||||
|
Key key = new Key(envId, userId);
|
||||||
|
Instant now = Instant.now(clock);
|
||||||
|
Entry cached = memo.get(key);
|
||||||
|
if (cached != null && now.isBefore(cached.expiresAt())) {
|
||||||
|
return cached.response();
|
||||||
|
}
|
||||||
|
List<String> groupIds = resolveGroupIds(userId);
|
||||||
|
List<String> roleNames = resolveRoleNames(userId);
|
||||||
|
Map<AlertSeverity, Long> bySeverity = instanceRepo.countUnreadBySeverity(envId, userId, groupIds, roleNames);
|
||||||
|
UnreadCountResponse response = UnreadCountResponse.from(bySeverity);
|
||||||
|
memo.put(key, new Entry(response, now.plusMillis(MEMO_TTL_MS)));
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private List<String> resolveGroupIds(String userId) {
|
||||||
|
return rbacService.getEffectiveGroupsForUser(userId)
|
||||||
|
.stream()
|
||||||
|
.map(g -> g.id().toString())
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> resolveRoleNames(String userId) {
|
||||||
|
return rbacService.getEffectiveRolesForUser(userId)
|
||||||
|
.stream()
|
||||||
|
.map(r -> r.name())
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
package io.cameleer.server.app.alerting.notify;
|
||||||
|
|
||||||
|
import com.samskivert.mustache.Mustache;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Renders Mustache templates against a context map.
|
||||||
|
* <p>
|
||||||
|
* Contract:
|
||||||
|
* <ul>
|
||||||
|
* <li>Unresolved {@code {{x.y.z}}} tokens render as the literal {@code {{x.y.z}}} and log WARN.</li>
|
||||||
|
* <li>Malformed templates (e.g. unclosed {@code {{}) return the original template string and log WARN.</li>
|
||||||
|
* <li>Never throws on template content.</li>
|
||||||
|
* </ul>
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class MustacheRenderer {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(MustacheRenderer.class);
|
||||||
|
|
||||||
|
/** Matches {{path}} tokens, capturing the trimmed path. Ignores triple-mustache and comments. */
|
||||||
|
private static final Pattern TOKEN = Pattern.compile("\\{\\{\\s*([^#/!>{\\s][^}]*)\\s*\\}\\}");
|
||||||
|
|
||||||
|
/** Sentinel prefix/suffix to survive Mustache compilation so we can post-replace. */
|
||||||
|
private static final String SENTINEL_PREFIX = "\u0000TPL\u0001";
|
||||||
|
private static final String SENTINEL_SUFFIX = "\u0001LPT\u0000";
|
||||||
|
|
||||||
|
public String render(String template, Map<String, Object> ctx) {
|
||||||
|
if (template == null) return "";
|
||||||
|
try {
|
||||||
|
// 1) Walk all {{path}} tokens. Those unresolved get replaced with a unique sentinel.
|
||||||
|
Map<String, String> literals = new LinkedHashMap<>();
|
||||||
|
StringBuilder pre = new StringBuilder();
|
||||||
|
Matcher m = TOKEN.matcher(template);
|
||||||
|
int sentinelIdx = 0;
|
||||||
|
boolean anyUnresolved = false;
|
||||||
|
while (m.find()) {
|
||||||
|
String path = m.group(1).trim();
|
||||||
|
if (resolvePath(ctx, path) == null) {
|
||||||
|
anyUnresolved = true;
|
||||||
|
String sentinelKey = SENTINEL_PREFIX + sentinelIdx++ + SENTINEL_SUFFIX;
|
||||||
|
literals.put(sentinelKey, "{{" + path + "}}");
|
||||||
|
m.appendReplacement(pre, Matcher.quoteReplacement(sentinelKey));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.appendTail(pre);
|
||||||
|
if (anyUnresolved) {
|
||||||
|
log.warn("MustacheRenderer: unresolved template variables; rendering as literals. template={}",
|
||||||
|
template.length() > 200 ? template.substring(0, 200) + "..." : template);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) Compile & render the pre-processed template (sentinels are plain text — not Mustache tags).
|
||||||
|
String rendered = Mustache.compiler()
|
||||||
|
.defaultValue("")
|
||||||
|
.escapeHTML(false)
|
||||||
|
.compile(pre.toString())
|
||||||
|
.execute(ctx);
|
||||||
|
|
||||||
|
// 3) Restore the sentinel placeholders back to their original {{path}} literals.
|
||||||
|
for (Map.Entry<String, String> e : literals.entrySet()) {
|
||||||
|
rendered = rendered.replace(e.getKey(), e.getValue());
|
||||||
|
}
|
||||||
|
return rendered;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("MustacheRenderer: template render failed, returning raw template: {}", e.getMessage());
|
||||||
|
return template;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolves a dotted path like "alert.state" against a nested Map context.
|
||||||
|
* Returns null if any segment is missing or the value is null.
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Object resolvePath(Map<String, Object> ctx, String path) {
|
||||||
|
if (ctx == null || path == null || path.isBlank()) return null;
|
||||||
|
String[] parts = path.split("\\.");
|
||||||
|
Object current = ctx.get(parts[0]);
|
||||||
|
for (int i = 1; i < parts.length; i++) {
|
||||||
|
if (!(current instanceof Map)) return null;
|
||||||
|
current = ((Map<String, Object>) current).get(parts[i]);
|
||||||
|
}
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,126 @@
|
|||||||
|
package io.cameleer.server.app.alerting.notify;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertInstance;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.runtime.Environment;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds the Mustache template context map from an AlertRule + AlertInstance + Environment.
|
||||||
|
* <p>
|
||||||
|
* Always present: {@code env}, {@code rule}, {@code alert}.
|
||||||
|
* Conditionally present based on {@code rule.conditionKind()}:
|
||||||
|
* <ul>
|
||||||
|
* <li>AGENT_STATE → {@code agent}, {@code app}</li>
|
||||||
|
* <li>DEPLOYMENT_STATE → {@code deployment}, {@code app}</li>
|
||||||
|
* <li>ROUTE_METRIC → {@code route}, {@code app}</li>
|
||||||
|
* <li>EXCHANGE_MATCH → {@code exchange}, {@code app}, {@code route}</li>
|
||||||
|
* <li>LOG_PATTERN → {@code log}, {@code app}</li>
|
||||||
|
* <li>JVM_METRIC → {@code metric}, {@code agent}, {@code app}</li>
|
||||||
|
* </ul>
|
||||||
|
* Values absent from {@code instance.context()} render as empty string so Mustache templates
|
||||||
|
* remain valid even for env-wide rules that have no app/route scope.
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class NotificationContextBuilder {
|
||||||
|
|
||||||
|
public Map<String, Object> build(AlertRule rule, AlertInstance instance, Environment env, String uiOrigin) {
|
||||||
|
Map<String, Object> ctx = new LinkedHashMap<>();
|
||||||
|
|
||||||
|
// --- env subtree ---
|
||||||
|
ctx.put("env", Map.of(
|
||||||
|
"slug", env.slug(),
|
||||||
|
"id", env.id().toString()
|
||||||
|
));
|
||||||
|
|
||||||
|
// --- rule subtree ---
|
||||||
|
ctx.put("rule", Map.of(
|
||||||
|
"id", rule.id().toString(),
|
||||||
|
"name", rule.name(),
|
||||||
|
"severity", rule.severity().name(),
|
||||||
|
"description", rule.description() == null ? "" : rule.description()
|
||||||
|
));
|
||||||
|
|
||||||
|
// --- alert subtree ---
|
||||||
|
String base = uiOrigin == null ? "" : uiOrigin;
|
||||||
|
ctx.put("alert", Map.of(
|
||||||
|
"id", instance.id().toString(),
|
||||||
|
"state", instance.state().name(),
|
||||||
|
"firedAt", instance.firedAt().toString(),
|
||||||
|
"resolvedAt", instance.resolvedAt() == null ? "" : instance.resolvedAt().toString(),
|
||||||
|
"ackedBy", instance.ackedBy() == null ? "" : instance.ackedBy(),
|
||||||
|
"link", base + "/alerts/inbox/" + instance.id(),
|
||||||
|
"currentValue", instance.currentValue() == null ? "" : instance.currentValue().toString(),
|
||||||
|
"threshold", instance.threshold() == null ? "" : instance.threshold().toString()
|
||||||
|
));
|
||||||
|
|
||||||
|
// --- per-kind conditional subtrees ---
|
||||||
|
if (rule.conditionKind() != null) {
|
||||||
|
switch (rule.conditionKind()) {
|
||||||
|
case AGENT_STATE -> {
|
||||||
|
ctx.put("agent", subtree(instance, "agent.id", "agent.name", "agent.state"));
|
||||||
|
ctx.put("app", subtree(instance, "app.slug", "app.id"));
|
||||||
|
}
|
||||||
|
case AGENT_LIFECYCLE -> {
|
||||||
|
ctx.put("agent", subtree(instance, "agent.id", "agent.app"));
|
||||||
|
ctx.put("event", subtree(instance, "event.type", "event.timestamp", "event.detail"));
|
||||||
|
}
|
||||||
|
case DEPLOYMENT_STATE -> {
|
||||||
|
ctx.put("deployment", subtree(instance, "deployment.id", "deployment.status"));
|
||||||
|
ctx.put("app", subtree(instance, "app.slug", "app.id"));
|
||||||
|
}
|
||||||
|
case ROUTE_METRIC -> {
|
||||||
|
ctx.put("route", subtree(instance, "route.id", "route.uri"));
|
||||||
|
ctx.put("app", subtree(instance, "app.slug", "app.id"));
|
||||||
|
}
|
||||||
|
case EXCHANGE_MATCH -> {
|
||||||
|
ctx.put("exchange", subtree(instance, "exchange.id", "exchange.status"));
|
||||||
|
ctx.put("app", subtree(instance, "app.slug", "app.id"));
|
||||||
|
ctx.put("route", subtree(instance, "route.id", "route.uri"));
|
||||||
|
}
|
||||||
|
case LOG_PATTERN -> {
|
||||||
|
ctx.put("log", subtree(instance, "log.pattern", "log.matchCount"));
|
||||||
|
ctx.put("app", subtree(instance, "app.slug", "app.id"));
|
||||||
|
}
|
||||||
|
case JVM_METRIC -> {
|
||||||
|
ctx.put("metric", subtree(instance, "metric.name", "metric.value"));
|
||||||
|
ctx.put("agent", subtree(instance, "agent.id", "agent.name"));
|
||||||
|
ctx.put("app", subtree(instance, "app.slug", "app.id"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts a flat subtree from {@code instance.context()} using dotted key paths.
|
||||||
|
* Each path like {@code "agent.id"} becomes the leaf key {@code "id"} in the returned map.
|
||||||
|
* Missing or null values are stored as empty string.
|
||||||
|
*/
|
||||||
|
private Map<String, Object> subtree(AlertInstance instance, String... dottedPaths) {
|
||||||
|
Map<String, Object> sub = new LinkedHashMap<>();
|
||||||
|
Map<String, Object> ic = instance.context();
|
||||||
|
for (String path : dottedPaths) {
|
||||||
|
String leafKey = path.contains(".") ? path.substring(path.lastIndexOf('.') + 1) : path;
|
||||||
|
Object val = resolveContext(ic, path);
|
||||||
|
sub.put(leafKey, val == null ? "" : val.toString());
|
||||||
|
}
|
||||||
|
return sub;
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private Object resolveContext(Map<String, Object> ctx, String path) {
|
||||||
|
if (ctx == null) return null;
|
||||||
|
String[] parts = path.split("\\.");
|
||||||
|
Object current = ctx.get(parts[0]);
|
||||||
|
for (int i = 1; i < parts.length; i++) {
|
||||||
|
if (!(current instanceof Map)) return null;
|
||||||
|
current = ((Map<String, Object>) current).get(parts[i]);
|
||||||
|
}
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,181 @@
|
|||||||
|
package io.cameleer.server.app.alerting.notify;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.config.AlertingProperties;
|
||||||
|
import io.cameleer.server.app.alerting.metrics.AlertingMetrics;
|
||||||
|
import io.cameleer.server.core.alerting.*;
|
||||||
|
import io.cameleer.server.core.outbound.OutboundConnectionRepository;
|
||||||
|
import io.cameleer.server.core.runtime.Environment;
|
||||||
|
import io.cameleer.server.core.runtime.EnvironmentRepository;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.beans.factory.annotation.Qualifier;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.scheduling.annotation.SchedulingConfigurer;
|
||||||
|
import org.springframework.scheduling.config.ScheduledTaskRegistrar;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.time.Clock;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Claim-polling outbox loop that dispatches {@link AlertNotification} records.
|
||||||
|
* <p>
|
||||||
|
* On each tick, claims a batch of due notifications, resolves the backing
|
||||||
|
* {@link AlertInstance} and {@link io.cameleer.server.core.outbound.OutboundConnection},
|
||||||
|
* checks active silences, delegates to {@link WebhookDispatcher}, and persists the outcome.
|
||||||
|
* <p>
|
||||||
|
* Retry backoff: {@code retryAfter × attempts} (30 s, 60 s, 90 s, …).
|
||||||
|
* After {@link AlertingProperties#effectiveWebhookMaxAttempts()} retries the notification
|
||||||
|
* is marked FAILED permanently.
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class NotificationDispatchJob implements SchedulingConfigurer {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(NotificationDispatchJob.class);
|
||||||
|
|
||||||
|
private final AlertingProperties props;
|
||||||
|
private final AlertNotificationRepository notificationRepo;
|
||||||
|
private final AlertInstanceRepository instanceRepo;
|
||||||
|
private final AlertRuleRepository ruleRepo;
|
||||||
|
private final AlertSilenceRepository silenceRepo;
|
||||||
|
private final OutboundConnectionRepository outboundRepo;
|
||||||
|
private final EnvironmentRepository envRepo;
|
||||||
|
private final WebhookDispatcher dispatcher;
|
||||||
|
private final SilenceMatcherService silenceMatcher;
|
||||||
|
private final NotificationContextBuilder contextBuilder;
|
||||||
|
private final String instanceId;
|
||||||
|
private final String tenantId;
|
||||||
|
private final Clock clock;
|
||||||
|
private final String uiOrigin;
|
||||||
|
private final AlertingMetrics metrics;
|
||||||
|
|
||||||
|
@SuppressWarnings("SpringJavaInjectionPointsAutowiringInspection")
|
||||||
|
public NotificationDispatchJob(
|
||||||
|
AlertingProperties props,
|
||||||
|
AlertNotificationRepository notificationRepo,
|
||||||
|
AlertInstanceRepository instanceRepo,
|
||||||
|
AlertRuleRepository ruleRepo,
|
||||||
|
AlertSilenceRepository silenceRepo,
|
||||||
|
OutboundConnectionRepository outboundRepo,
|
||||||
|
EnvironmentRepository envRepo,
|
||||||
|
WebhookDispatcher dispatcher,
|
||||||
|
SilenceMatcherService silenceMatcher,
|
||||||
|
NotificationContextBuilder contextBuilder,
|
||||||
|
@Qualifier("alertingInstanceId") String instanceId,
|
||||||
|
@Value("${cameleer.server.tenant.id:default}") String tenantId,
|
||||||
|
Clock alertingClock,
|
||||||
|
@Value("${cameleer.server.ui-origin:#{null}}") String uiOrigin,
|
||||||
|
AlertingMetrics metrics) {
|
||||||
|
|
||||||
|
this.props = props;
|
||||||
|
this.notificationRepo = notificationRepo;
|
||||||
|
this.instanceRepo = instanceRepo;
|
||||||
|
this.ruleRepo = ruleRepo;
|
||||||
|
this.silenceRepo = silenceRepo;
|
||||||
|
this.outboundRepo = outboundRepo;
|
||||||
|
this.envRepo = envRepo;
|
||||||
|
this.dispatcher = dispatcher;
|
||||||
|
this.silenceMatcher = silenceMatcher;
|
||||||
|
this.contextBuilder = contextBuilder;
|
||||||
|
this.instanceId = instanceId;
|
||||||
|
this.tenantId = tenantId;
|
||||||
|
this.clock = alertingClock;
|
||||||
|
this.uiOrigin = uiOrigin;
|
||||||
|
this.metrics = metrics;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// SchedulingConfigurer
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void configureTasks(ScheduledTaskRegistrar registrar) {
|
||||||
|
registrar.addFixedDelayTask(this::tick, props.effectiveNotificationTickIntervalMs());
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Tick — accessible for tests across packages
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
public void tick() {
|
||||||
|
List<AlertNotification> claimed = notificationRepo.claimDueNotifications(
|
||||||
|
instanceId,
|
||||||
|
props.effectiveNotificationBatchSize(),
|
||||||
|
props.effectiveClaimTtlSeconds());
|
||||||
|
|
||||||
|
for (AlertNotification n : claimed) {
|
||||||
|
try {
|
||||||
|
processOne(n);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Notification dispatch error for {}: {}", n.id(), e.toString());
|
||||||
|
notificationRepo.scheduleRetry(n.id(), Instant.now(clock).plusSeconds(30), -1, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Per-notification processing
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private void processOne(AlertNotification n) {
|
||||||
|
// 1. Resolve alert instance
|
||||||
|
AlertInstance instance = instanceRepo.findById(n.alertInstanceId()).orElse(null);
|
||||||
|
if (instance == null) {
|
||||||
|
notificationRepo.markFailed(n.id(), 0, "instance deleted");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Resolve outbound connection
|
||||||
|
var conn = outboundRepo.findById(tenantId, n.outboundConnectionId()).orElse(null);
|
||||||
|
if (conn == null) {
|
||||||
|
notificationRepo.markFailed(n.id(), 0, "outbound connection deleted");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Resolve rule and environment (may be null after deletion)
|
||||||
|
AlertRule rule = instance.ruleId() == null ? null
|
||||||
|
: ruleRepo.findById(instance.ruleId()).orElse(null);
|
||||||
|
Environment env = envRepo.findById(instance.environmentId()).orElse(null);
|
||||||
|
|
||||||
|
// 4. Build Mustache context (guard: rule or env may be null after deletion)
|
||||||
|
Map<String, Object> context = (rule != null && env != null)
|
||||||
|
? contextBuilder.build(rule, instance, env, uiOrigin)
|
||||||
|
: Map.of();
|
||||||
|
|
||||||
|
// 5. Silence check
|
||||||
|
List<AlertSilence> activeSilences = silenceRepo.listActive(instance.environmentId(), Instant.now(clock));
|
||||||
|
for (AlertSilence s : activeSilences) {
|
||||||
|
if (silenceMatcher.matches(s.matcher(), instance, rule)) {
|
||||||
|
instanceRepo.markSilenced(instance.id(), true);
|
||||||
|
notificationRepo.markFailed(n.id(), 0, "silenced");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Dispatch
|
||||||
|
WebhookDispatcher.Outcome outcome = dispatcher.dispatch(n, rule, instance, conn, context);
|
||||||
|
|
||||||
|
NotificationStatus outcomeStatus = outcome.status();
|
||||||
|
if (outcomeStatus == NotificationStatus.DELIVERED) {
|
||||||
|
Instant now = Instant.now(clock);
|
||||||
|
notificationRepo.markDelivered(n.id(), outcome.httpStatus(), outcome.snippet(), now);
|
||||||
|
instanceRepo.save(instance.withLastNotifiedAt(now));
|
||||||
|
metrics.notificationOutcome(NotificationStatus.DELIVERED);
|
||||||
|
} else if (outcomeStatus == NotificationStatus.FAILED) {
|
||||||
|
notificationRepo.markFailed(n.id(), outcome.httpStatus(), outcome.snippet());
|
||||||
|
metrics.notificationOutcome(NotificationStatus.FAILED);
|
||||||
|
} else {
|
||||||
|
// null status = transient failure (5xx / network / timeout) → retry
|
||||||
|
int attempts = n.attempts() + 1;
|
||||||
|
if (attempts >= props.effectiveWebhookMaxAttempts()) {
|
||||||
|
notificationRepo.markFailed(n.id(), outcome.httpStatus(), outcome.snippet());
|
||||||
|
metrics.notificationOutcome(NotificationStatus.FAILED);
|
||||||
|
} else {
|
||||||
|
Instant next = Instant.now(clock).plus(outcome.retryAfter().multipliedBy(attempts));
|
||||||
|
notificationRepo.scheduleRetry(n.id(), next, outcome.httpStatus(), outcome.snippet());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,58 @@
|
|||||||
|
package io.cameleer.server.app.alerting.notify;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertInstance;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.SilenceMatcher;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Evaluates whether an active silence matches an alert instance at notification-dispatch time.
|
||||||
|
* <p>
|
||||||
|
* Each non-null field on the matcher is an additional AND constraint. A null field is a wildcard.
|
||||||
|
* Matching is purely in-process — no I/O.
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class SilenceMatcherService {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns {@code true} if the silence covers this alert instance.
|
||||||
|
*
|
||||||
|
* @param matcher the silence's matching spec (never null)
|
||||||
|
* @param instance the alert instance to test (never null)
|
||||||
|
* @param rule the alert rule; may be null when the rule was deleted after instance creation.
|
||||||
|
* Scope-based matchers (appSlug, routeId, agentId) return false when rule is null
|
||||||
|
* because the scope cannot be verified.
|
||||||
|
*/
|
||||||
|
public boolean matches(SilenceMatcher matcher, AlertInstance instance, AlertRule rule) {
|
||||||
|
// ruleId constraint
|
||||||
|
if (matcher.ruleId() != null && !matcher.ruleId().equals(instance.ruleId())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// severity constraint
|
||||||
|
if (matcher.severity() != null && matcher.severity() != instance.severity()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// scope-based constraints require the rule to derive scope from
|
||||||
|
boolean needsScope = matcher.appSlug() != null || matcher.routeId() != null || matcher.agentId() != null;
|
||||||
|
if (needsScope && rule == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rule != null && rule.condition() != null) {
|
||||||
|
var scope = rule.condition().scope();
|
||||||
|
if (matcher.appSlug() != null && !matcher.appSlug().equals(scope.appSlug())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (matcher.routeId() != null && !matcher.routeId().equals(scope.routeId())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (matcher.agentId() != null && !matcher.agentId().equals(scope.agentId())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,213 @@
|
|||||||
|
package io.cameleer.server.app.alerting.notify;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.config.AlertingProperties;
|
||||||
|
import io.cameleer.server.app.outbound.crypto.SecretCipher;
|
||||||
|
import io.cameleer.server.core.alerting.AlertInstance;
|
||||||
|
import io.cameleer.server.core.alerting.AlertNotification;
|
||||||
|
import io.cameleer.server.core.alerting.AlertRule;
|
||||||
|
import io.cameleer.server.core.alerting.NotificationStatus;
|
||||||
|
import io.cameleer.server.core.alerting.WebhookBinding;
|
||||||
|
import io.cameleer.server.core.http.OutboundHttpClientFactory;
|
||||||
|
import io.cameleer.server.core.http.OutboundHttpRequestContext;
|
||||||
|
import io.cameleer.server.core.outbound.OutboundConnection;
|
||||||
|
import io.cameleer.server.core.outbound.OutboundMethod;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.apache.hc.client5.http.classic.methods.HttpPatch;
|
||||||
|
import org.apache.hc.client5.http.classic.methods.HttpPost;
|
||||||
|
import org.apache.hc.client5.http.classic.methods.HttpPut;
|
||||||
|
import org.apache.hc.client5.http.classic.methods.HttpUriRequestBase;
|
||||||
|
import org.apache.hc.core5.http.io.entity.EntityUtils;
|
||||||
|
import org.apache.hc.core5.http.io.entity.StringEntity;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Renders, signs, and dispatches webhook notifications over HTTP.
|
||||||
|
* <p>
|
||||||
|
* Classification:
|
||||||
|
* <ul>
|
||||||
|
* <li>2xx → {@link NotificationStatus#DELIVERED}</li>
|
||||||
|
* <li>4xx → {@link NotificationStatus#FAILED} (retry won't help)</li>
|
||||||
|
* <li>5xx / network / timeout → {@code null} status (caller retries up to max attempts)</li>
|
||||||
|
* </ul>
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class WebhookDispatcher {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(WebhookDispatcher.class);
|
||||||
|
|
||||||
|
/** baseDelay that callers multiply by attempt count: 30s, 60s, 90s, … */
|
||||||
|
static final Duration BASE_RETRY_DELAY = Duration.ofSeconds(30);
|
||||||
|
|
||||||
|
private static final int SNIPPET_LIMIT = 512;
|
||||||
|
private static final String DEFAULT_CONTENT_TYPE = "application/json";
|
||||||
|
|
||||||
|
private final OutboundHttpClientFactory clientFactory;
|
||||||
|
private final SecretCipher secretCipher;
|
||||||
|
private final MustacheRenderer renderer;
|
||||||
|
private final AlertingProperties props;
|
||||||
|
private final ObjectMapper objectMapper;
|
||||||
|
|
||||||
|
public WebhookDispatcher(OutboundHttpClientFactory clientFactory,
|
||||||
|
SecretCipher secretCipher,
|
||||||
|
MustacheRenderer renderer,
|
||||||
|
AlertingProperties props,
|
||||||
|
ObjectMapper objectMapper) {
|
||||||
|
this.clientFactory = clientFactory;
|
||||||
|
this.secretCipher = secretCipher;
|
||||||
|
this.renderer = renderer;
|
||||||
|
this.props = props;
|
||||||
|
this.objectMapper = objectMapper;
|
||||||
|
}
|
||||||
|
|
||||||
|
public record Outcome(
|
||||||
|
NotificationStatus status,
|
||||||
|
int httpStatus,
|
||||||
|
String snippet,
|
||||||
|
Duration retryAfter) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dispatch a single webhook notification.
|
||||||
|
*
|
||||||
|
* @param notif the outbox record (contains webhookId used to find per-rule overrides)
|
||||||
|
* @param rule the alert rule (may be null when rule was deleted)
|
||||||
|
* @param instance the alert instance
|
||||||
|
* @param conn the resolved outbound connection
|
||||||
|
* @param context the Mustache rendering context
|
||||||
|
*/
|
||||||
|
public Outcome dispatch(AlertNotification notif,
|
||||||
|
AlertRule rule,
|
||||||
|
AlertInstance instance,
|
||||||
|
OutboundConnection conn,
|
||||||
|
Map<String, Object> context) {
|
||||||
|
try {
|
||||||
|
// 1. Determine per-binding overrides
|
||||||
|
WebhookBinding binding = findBinding(rule, notif);
|
||||||
|
|
||||||
|
// 2. Render URL
|
||||||
|
String url = renderer.render(conn.url(), context);
|
||||||
|
|
||||||
|
// 3. Build body
|
||||||
|
String body = buildBody(conn, binding, context);
|
||||||
|
|
||||||
|
// 4. Build headers
|
||||||
|
Map<String, String> headers = buildHeaders(conn, binding, context);
|
||||||
|
|
||||||
|
// 5. HMAC sign if configured
|
||||||
|
if (conn.hmacSecretCiphertext() != null) {
|
||||||
|
String secret = secretCipher.decrypt(conn.hmacSecretCiphertext());
|
||||||
|
String sig = new HmacSigner().sign(secret, body.getBytes(StandardCharsets.UTF_8));
|
||||||
|
headers.put("X-Cameleer-Signature", sig);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Build HTTP request
|
||||||
|
Duration timeout = Duration.ofMillis(props.effectiveWebhookTimeoutMs());
|
||||||
|
OutboundHttpRequestContext ctx = new OutboundHttpRequestContext(
|
||||||
|
conn.tlsTrustMode(), conn.tlsCaPemPaths(), timeout, timeout);
|
||||||
|
|
||||||
|
var client = clientFactory.clientFor(ctx);
|
||||||
|
HttpUriRequestBase request = buildRequest(conn.method(), url);
|
||||||
|
for (var e : headers.entrySet()) {
|
||||||
|
request.setHeader(e.getKey(), e.getValue());
|
||||||
|
}
|
||||||
|
request.setEntity(new StringEntity(body, StandardCharsets.UTF_8));
|
||||||
|
|
||||||
|
// 7. Execute and classify
|
||||||
|
try (var response = client.execute(request)) {
|
||||||
|
int code = response.getCode();
|
||||||
|
String snippet = snippet(response.getEntity() != null
|
||||||
|
? EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8)
|
||||||
|
: "");
|
||||||
|
|
||||||
|
if (code >= 200 && code < 300) {
|
||||||
|
return new Outcome(NotificationStatus.DELIVERED, code, snippet, null);
|
||||||
|
} else if (code >= 400 && code < 500) {
|
||||||
|
return new Outcome(NotificationStatus.FAILED, code, snippet, null);
|
||||||
|
} else {
|
||||||
|
return new Outcome(null, code, snippet, BASE_RETRY_DELAY);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("WebhookDispatcher: network/timeout error dispatching notification {}: {}",
|
||||||
|
notif.id(), e.getMessage());
|
||||||
|
return new Outcome(null, 0, snippet(e.getMessage()), BASE_RETRY_DELAY);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private WebhookBinding findBinding(AlertRule rule, AlertNotification notif) {
|
||||||
|
if (rule == null || notif.webhookId() == null) return null;
|
||||||
|
return rule.webhooks().stream()
|
||||||
|
.filter(w -> w.id().equals(notif.webhookId()))
|
||||||
|
.findFirst()
|
||||||
|
.orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildBody(OutboundConnection conn, WebhookBinding binding, Map<String, Object> context) {
|
||||||
|
// Priority: per-binding override > connection default > built-in JSON envelope
|
||||||
|
String tmpl = null;
|
||||||
|
if (binding != null && binding.bodyOverride() != null) {
|
||||||
|
tmpl = binding.bodyOverride();
|
||||||
|
} else if (conn.defaultBodyTmpl() != null) {
|
||||||
|
tmpl = conn.defaultBodyTmpl();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tmpl != null) {
|
||||||
|
return renderer.render(tmpl, context);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Built-in default: serialize the entire context map as JSON
|
||||||
|
try {
|
||||||
|
return objectMapper.writeValueAsString(context);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("WebhookDispatcher: failed to serialize context as JSON, using empty object", e);
|
||||||
|
return "{}";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, String> buildHeaders(OutboundConnection conn, WebhookBinding binding,
|
||||||
|
Map<String, Object> context) {
|
||||||
|
Map<String, String> headers = new LinkedHashMap<>();
|
||||||
|
|
||||||
|
// Default content-type
|
||||||
|
headers.put("Content-Type", DEFAULT_CONTENT_TYPE);
|
||||||
|
|
||||||
|
// Connection-level default headers (keys are literal, values are Mustache-rendered)
|
||||||
|
for (var e : conn.defaultHeaders().entrySet()) {
|
||||||
|
headers.put(e.getKey(), renderer.render(e.getValue(), context));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-binding overrides (also Mustache-rendered values)
|
||||||
|
if (binding != null) {
|
||||||
|
for (var e : binding.headerOverrides().entrySet()) {
|
||||||
|
headers.put(e.getKey(), renderer.render(e.getValue(), context));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return headers;
|
||||||
|
}
|
||||||
|
|
||||||
|
private HttpUriRequestBase buildRequest(OutboundMethod method, String url) {
|
||||||
|
if (method == null) method = OutboundMethod.POST;
|
||||||
|
return switch (method) {
|
||||||
|
case PUT -> new HttpPut(url);
|
||||||
|
case PATCH -> new HttpPatch(url);
|
||||||
|
default -> new HttpPost(url);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private String snippet(String text) {
|
||||||
|
if (text == null) return "";
|
||||||
|
return text.length() <= SNIPPET_LIMIT ? text : text.substring(0, SNIPPET_LIMIT);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
package io.cameleer.server.app.alerting.retention;
|
||||||
|
|
||||||
|
import io.cameleer.server.app.alerting.config.AlertingProperties;
|
||||||
|
import io.cameleer.server.core.alerting.AlertInstanceRepository;
|
||||||
|
import io.cameleer.server.core.alerting.AlertNotificationRepository;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.time.Clock;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.time.temporal.ChronoUnit;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Nightly retention job for alerting data.
|
||||||
|
* <p>
|
||||||
|
* Deletes RESOLVED {@link io.cameleer.server.core.alerting.AlertInstance} rows older than
|
||||||
|
* {@code cameleer.server.alerting.eventRetentionDays} and DELIVERED/FAILED
|
||||||
|
* {@link io.cameleer.server.core.alerting.AlertNotification} rows older than
|
||||||
|
* {@code cameleer.server.alerting.notificationRetentionDays}.
|
||||||
|
* <p>
|
||||||
|
* Duplicate runs across replicas are tolerable — the DELETEs are idempotent.
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class AlertingRetentionJob {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(AlertingRetentionJob.class);
|
||||||
|
|
||||||
|
private final AlertingProperties props;
|
||||||
|
private final AlertInstanceRepository alertInstanceRepo;
|
||||||
|
private final AlertNotificationRepository alertNotificationRepo;
|
||||||
|
private final Clock clock;
|
||||||
|
|
||||||
|
public AlertingRetentionJob(AlertingProperties props,
|
||||||
|
AlertInstanceRepository alertInstanceRepo,
|
||||||
|
AlertNotificationRepository alertNotificationRepo,
|
||||||
|
Clock alertingClock) {
|
||||||
|
this.props = props;
|
||||||
|
this.alertInstanceRepo = alertInstanceRepo;
|
||||||
|
this.alertNotificationRepo = alertNotificationRepo;
|
||||||
|
this.clock = alertingClock;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Scheduled(cron = "0 0 3 * * *") // 03:00 every day
|
||||||
|
public void cleanup() {
|
||||||
|
log.info("Alerting retention job started");
|
||||||
|
|
||||||
|
Instant now = Instant.now(clock);
|
||||||
|
|
||||||
|
Instant instanceCutoff = now.minus(props.effectiveEventRetentionDays(), ChronoUnit.DAYS);
|
||||||
|
alertInstanceRepo.deleteResolvedBefore(instanceCutoff);
|
||||||
|
log.info("Alerting retention: deleted RESOLVED instances older than {} ({} days)",
|
||||||
|
instanceCutoff, props.effectiveEventRetentionDays());
|
||||||
|
|
||||||
|
Instant notificationCutoff = now.minus(props.effectiveNotificationRetentionDays(), ChronoUnit.DAYS);
|
||||||
|
alertNotificationRepo.deleteSettledBefore(notificationCutoff);
|
||||||
|
log.info("Alerting retention: deleted settled notifications older than {} ({} days)",
|
||||||
|
notificationCutoff, props.effectiveNotificationRetentionDays());
|
||||||
|
|
||||||
|
log.info("Alerting retention job completed");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,377 @@
|
|||||||
|
package io.cameleer.server.app.alerting.storage;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.*;
|
||||||
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.springframework.dao.DuplicateKeyException;
|
||||||
|
import org.springframework.jdbc.core.ConnectionCallback;
|
||||||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
|
import org.springframework.jdbc.core.RowMapper;
|
||||||
|
|
||||||
|
import java.sql.Array;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Timestamp;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class PostgresAlertInstanceRepository implements AlertInstanceRepository {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(PostgresAlertInstanceRepository.class);
|
||||||
|
|
||||||
|
private final JdbcTemplate jdbc;
|
||||||
|
private final ObjectMapper om;
|
||||||
|
|
||||||
|
public PostgresAlertInstanceRepository(JdbcTemplate jdbc, ObjectMapper om) {
|
||||||
|
this.jdbc = jdbc;
|
||||||
|
this.om = om;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AlertInstance save(AlertInstance i) {
|
||||||
|
String sql = """
|
||||||
|
INSERT INTO alert_instances (
|
||||||
|
id, rule_id, rule_snapshot, environment_id, state, severity,
|
||||||
|
fired_at, acked_at, acked_by, resolved_at, last_notified_at,
|
||||||
|
read_at, deleted_at,
|
||||||
|
silenced, current_value, threshold, context, title, message,
|
||||||
|
target_user_ids, target_group_ids, target_role_names)
|
||||||
|
VALUES (?, ?, ?::jsonb, ?, ?::alert_state_enum, ?::severity_enum,
|
||||||
|
?, ?, ?, ?, ?,
|
||||||
|
?, ?,
|
||||||
|
?, ?, ?, ?::jsonb, ?, ?,
|
||||||
|
?, ?, ?)
|
||||||
|
ON CONFLICT (id) DO UPDATE SET
|
||||||
|
state = EXCLUDED.state,
|
||||||
|
acked_at = EXCLUDED.acked_at,
|
||||||
|
acked_by = EXCLUDED.acked_by,
|
||||||
|
resolved_at = EXCLUDED.resolved_at,
|
||||||
|
last_notified_at = EXCLUDED.last_notified_at,
|
||||||
|
read_at = EXCLUDED.read_at,
|
||||||
|
deleted_at = EXCLUDED.deleted_at,
|
||||||
|
silenced = EXCLUDED.silenced,
|
||||||
|
current_value = EXCLUDED.current_value,
|
||||||
|
threshold = EXCLUDED.threshold,
|
||||||
|
context = EXCLUDED.context,
|
||||||
|
title = EXCLUDED.title,
|
||||||
|
message = EXCLUDED.message,
|
||||||
|
target_user_ids = EXCLUDED.target_user_ids,
|
||||||
|
target_group_ids = EXCLUDED.target_group_ids,
|
||||||
|
target_role_names = EXCLUDED.target_role_names
|
||||||
|
""";
|
||||||
|
Array userIds = toTextArray(i.targetUserIds());
|
||||||
|
Array groupIds = toUuidArray(i.targetGroupIds());
|
||||||
|
Array roleNames = toTextArray(i.targetRoleNames());
|
||||||
|
|
||||||
|
try {
|
||||||
|
jdbc.update(sql,
|
||||||
|
i.id(), i.ruleId(), writeJson(i.ruleSnapshot()),
|
||||||
|
i.environmentId(), i.state().name(), i.severity().name(),
|
||||||
|
ts(i.firedAt()), ts(i.ackedAt()), i.ackedBy(),
|
||||||
|
ts(i.resolvedAt()), ts(i.lastNotifiedAt()),
|
||||||
|
ts(i.readAt()), ts(i.deletedAt()),
|
||||||
|
i.silenced(), i.currentValue(), i.threshold(),
|
||||||
|
writeJson(i.context()), i.title(), i.message(),
|
||||||
|
userIds, groupIds, roleNames);
|
||||||
|
} catch (DuplicateKeyException e) {
|
||||||
|
log.info("Skipped duplicate open alert_instance for rule {}: {}", i.ruleId(), e.getMessage());
|
||||||
|
return findOpenForRule(i.ruleId()).orElse(i);
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<AlertInstance> findById(UUID id) {
|
||||||
|
var list = jdbc.query("SELECT * FROM alert_instances WHERE id = ?", rowMapper(), id);
|
||||||
|
return list.isEmpty() ? Optional.empty() : Optional.of(list.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<AlertInstance> findOpenForRule(UUID ruleId) {
|
||||||
|
var list = jdbc.query("""
|
||||||
|
SELECT * FROM alert_instances
|
||||||
|
WHERE rule_id = ?
|
||||||
|
AND state IN ('PENDING','FIRING')
|
||||||
|
AND deleted_at IS NULL
|
||||||
|
LIMIT 1
|
||||||
|
""", rowMapper(), ruleId);
|
||||||
|
return list.isEmpty() ? Optional.empty() : Optional.of(list.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<AlertInstance> listForInbox(UUID environmentId,
|
||||||
|
List<String> userGroupIdFilter,
|
||||||
|
String userId,
|
||||||
|
List<String> userRoleNames,
|
||||||
|
List<AlertState> states,
|
||||||
|
List<AlertSeverity> severities,
|
||||||
|
Boolean acked,
|
||||||
|
Boolean read,
|
||||||
|
int limit) {
|
||||||
|
Array groupArray = toUuidArrayFromStrings(userGroupIdFilter);
|
||||||
|
Array roleArray = toTextArray(userRoleNames);
|
||||||
|
|
||||||
|
StringBuilder sql = new StringBuilder("""
|
||||||
|
SELECT * FROM alert_instances
|
||||||
|
WHERE environment_id = ?
|
||||||
|
AND (
|
||||||
|
? = ANY(target_user_ids)
|
||||||
|
OR target_group_ids && ?
|
||||||
|
OR target_role_names && ?
|
||||||
|
)
|
||||||
|
""");
|
||||||
|
List<Object> args = new ArrayList<>(List.of(environmentId, userId, groupArray, roleArray));
|
||||||
|
|
||||||
|
if (states != null && !states.isEmpty()) {
|
||||||
|
Array stateArray = toTextArray(states.stream().map(Enum::name).toList());
|
||||||
|
sql.append(" AND state::text = ANY(?)");
|
||||||
|
args.add(stateArray);
|
||||||
|
}
|
||||||
|
if (severities != null && !severities.isEmpty()) {
|
||||||
|
Array severityArray = toTextArray(severities.stream().map(Enum::name).toList());
|
||||||
|
sql.append(" AND severity::text = ANY(?)");
|
||||||
|
args.add(severityArray);
|
||||||
|
}
|
||||||
|
if (acked != null) {
|
||||||
|
sql.append(acked ? " AND acked_at IS NOT NULL" : " AND acked_at IS NULL");
|
||||||
|
}
|
||||||
|
if (read != null) {
|
||||||
|
sql.append(read ? " AND read_at IS NOT NULL" : " AND read_at IS NULL");
|
||||||
|
}
|
||||||
|
sql.append(" AND deleted_at IS NULL");
|
||||||
|
sql.append(" ORDER BY fired_at DESC LIMIT ?");
|
||||||
|
args.add(limit);
|
||||||
|
|
||||||
|
return jdbc.query(sql.toString(), rowMapper(), args.toArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<AlertSeverity, Long> countUnreadBySeverity(UUID environmentId,
|
||||||
|
String userId,
|
||||||
|
List<String> groupIds,
|
||||||
|
List<String> roleNames) {
|
||||||
|
Array groupArray = toUuidArrayFromStrings(groupIds);
|
||||||
|
Array roleArray = toTextArray(roleNames);
|
||||||
|
String sql = """
|
||||||
|
SELECT severity::text AS severity, COUNT(*) AS cnt
|
||||||
|
FROM alert_instances
|
||||||
|
WHERE environment_id = ?
|
||||||
|
AND read_at IS NULL
|
||||||
|
AND deleted_at IS NULL
|
||||||
|
AND (
|
||||||
|
? = ANY(target_user_ids)
|
||||||
|
OR target_group_ids && ?
|
||||||
|
OR target_role_names && ?
|
||||||
|
)
|
||||||
|
GROUP BY severity
|
||||||
|
""";
|
||||||
|
EnumMap<AlertSeverity, Long> counts = new EnumMap<>(AlertSeverity.class);
|
||||||
|
for (AlertSeverity s : AlertSeverity.values()) counts.put(s, 0L);
|
||||||
|
jdbc.query(sql, (org.springframework.jdbc.core.RowCallbackHandler) rs -> counts.put(
|
||||||
|
AlertSeverity.valueOf(rs.getString("severity")), rs.getLong("cnt")
|
||||||
|
), environmentId, userId, groupArray, roleArray);
|
||||||
|
return counts;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void ack(UUID id, String userId, Instant when) {
|
||||||
|
jdbc.update("""
|
||||||
|
UPDATE alert_instances
|
||||||
|
SET acked_at = ?, acked_by = ?
|
||||||
|
WHERE id = ? AND acked_at IS NULL AND deleted_at IS NULL
|
||||||
|
""", Timestamp.from(when), userId, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void markRead(UUID id, Instant when) {
|
||||||
|
jdbc.update("UPDATE alert_instances SET read_at = ? WHERE id = ? AND read_at IS NULL",
|
||||||
|
Timestamp.from(when), id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void bulkMarkRead(List<UUID> ids, Instant when) {
|
||||||
|
if (ids == null || ids.isEmpty()) return;
|
||||||
|
Array idArray = jdbc.execute((ConnectionCallback<Array>) c ->
|
||||||
|
c.createArrayOf("uuid", ids.toArray()));
|
||||||
|
jdbc.update("""
|
||||||
|
UPDATE alert_instances SET read_at = ?
|
||||||
|
WHERE id = ANY(?) AND read_at IS NULL AND deleted_at IS NULL
|
||||||
|
""", Timestamp.from(when), idArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void softDelete(UUID id, Instant when) {
|
||||||
|
jdbc.update("UPDATE alert_instances SET deleted_at = ? WHERE id = ? AND deleted_at IS NULL",
|
||||||
|
Timestamp.from(when), id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void bulkSoftDelete(List<UUID> ids, Instant when) {
|
||||||
|
if (ids == null || ids.isEmpty()) return;
|
||||||
|
Array idArray = jdbc.execute((ConnectionCallback<Array>) c ->
|
||||||
|
c.createArrayOf("uuid", ids.toArray()));
|
||||||
|
jdbc.update("""
|
||||||
|
UPDATE alert_instances SET deleted_at = ?
|
||||||
|
WHERE id = ANY(?) AND deleted_at IS NULL
|
||||||
|
""", Timestamp.from(when), idArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void restore(UUID id) {
|
||||||
|
jdbc.update("UPDATE alert_instances SET deleted_at = NULL WHERE id = ?", id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void bulkAck(List<UUID> ids, String userId, Instant when) {
|
||||||
|
if (ids == null || ids.isEmpty()) return;
|
||||||
|
Array idArray = jdbc.execute((ConnectionCallback<Array>) c ->
|
||||||
|
c.createArrayOf("uuid", ids.toArray()));
|
||||||
|
jdbc.update("""
|
||||||
|
UPDATE alert_instances SET acked_at = ?, acked_by = ?
|
||||||
|
WHERE id = ANY(?) AND acked_at IS NULL AND deleted_at IS NULL
|
||||||
|
""", Timestamp.from(when), userId, idArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resolve(UUID id, Instant when) {
|
||||||
|
jdbc.update("""
|
||||||
|
UPDATE alert_instances
|
||||||
|
SET state = 'RESOLVED'::alert_state_enum,
|
||||||
|
resolved_at = ?
|
||||||
|
WHERE id = ?
|
||||||
|
""", Timestamp.from(when), id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void markSilenced(UUID id, boolean silenced) {
|
||||||
|
jdbc.update("UPDATE alert_instances SET silenced = ? WHERE id = ?", silenced, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<AlertInstance> listFiringDueForReNotify(Instant now) {
|
||||||
|
return jdbc.query("""
|
||||||
|
SELECT ai.* FROM alert_instances ai
|
||||||
|
JOIN alert_rules ar ON ar.id = ai.rule_id
|
||||||
|
WHERE ai.state = 'FIRING'::alert_state_enum
|
||||||
|
AND ai.silenced = false
|
||||||
|
AND ar.enabled = true
|
||||||
|
AND ar.re_notify_minutes > 0
|
||||||
|
AND ai.last_notified_at IS NOT NULL
|
||||||
|
AND ai.last_notified_at + make_interval(mins => ar.re_notify_minutes) <= ?
|
||||||
|
""", rowMapper(), Timestamp.from(now));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<UUID> filterInEnvLive(List<UUID> ids, UUID environmentId) {
|
||||||
|
if (ids == null || ids.isEmpty()) return List.of();
|
||||||
|
Array idArray = jdbc.execute((ConnectionCallback<Array>) c ->
|
||||||
|
c.createArrayOf("uuid", ids.toArray()));
|
||||||
|
return jdbc.query("""
|
||||||
|
SELECT id FROM alert_instances
|
||||||
|
WHERE id = ANY(?) AND environment_id = ? AND deleted_at IS NULL
|
||||||
|
""", (rs, i) -> (UUID) rs.getObject("id"), idArray, environmentId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deleteResolvedBefore(Instant cutoff) {
|
||||||
|
jdbc.update("""
|
||||||
|
DELETE FROM alert_instances
|
||||||
|
WHERE state = 'RESOLVED'::alert_state_enum
|
||||||
|
AND resolved_at < ?
|
||||||
|
""", Timestamp.from(cutoff));
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private RowMapper<AlertInstance> rowMapper() {
|
||||||
|
return (rs, i) -> {
|
||||||
|
try {
|
||||||
|
Map<String, Object> snapshot = om.readValue(
|
||||||
|
rs.getString("rule_snapshot"), new TypeReference<>() {});
|
||||||
|
Map<String, Object> context = om.readValue(
|
||||||
|
rs.getString("context"), new TypeReference<>() {});
|
||||||
|
|
||||||
|
Timestamp ackedAt = rs.getTimestamp("acked_at");
|
||||||
|
Timestamp resolvedAt = rs.getTimestamp("resolved_at");
|
||||||
|
Timestamp lastNotifiedAt = rs.getTimestamp("last_notified_at");
|
||||||
|
Timestamp readAt = rs.getTimestamp("read_at");
|
||||||
|
Timestamp deletedAt = rs.getTimestamp("deleted_at");
|
||||||
|
|
||||||
|
Object cvObj = rs.getObject("current_value");
|
||||||
|
Double currentValue = cvObj == null ? null : ((Number) cvObj).doubleValue();
|
||||||
|
Object thObj = rs.getObject("threshold");
|
||||||
|
Double threshold = thObj == null ? null : ((Number) thObj).doubleValue();
|
||||||
|
|
||||||
|
UUID ruleId = rs.getObject("rule_id") == null ? null : (UUID) rs.getObject("rule_id");
|
||||||
|
|
||||||
|
return new AlertInstance(
|
||||||
|
(UUID) rs.getObject("id"),
|
||||||
|
ruleId,
|
||||||
|
snapshot,
|
||||||
|
(UUID) rs.getObject("environment_id"),
|
||||||
|
AlertState.valueOf(rs.getString("state")),
|
||||||
|
AlertSeverity.valueOf(rs.getString("severity")),
|
||||||
|
rs.getTimestamp("fired_at").toInstant(),
|
||||||
|
ackedAt == null ? null : ackedAt.toInstant(),
|
||||||
|
rs.getString("acked_by"),
|
||||||
|
resolvedAt == null ? null : resolvedAt.toInstant(),
|
||||||
|
lastNotifiedAt == null ? null : lastNotifiedAt.toInstant(),
|
||||||
|
readAt == null ? null : readAt.toInstant(),
|
||||||
|
deletedAt == null ? null : deletedAt.toInstant(),
|
||||||
|
rs.getBoolean("silenced"),
|
||||||
|
currentValue,
|
||||||
|
threshold,
|
||||||
|
context,
|
||||||
|
rs.getString("title"),
|
||||||
|
rs.getString("message"),
|
||||||
|
readTextArray(rs.getArray("target_user_ids")),
|
||||||
|
readUuidArray(rs.getArray("target_group_ids")),
|
||||||
|
readTextArray(rs.getArray("target_role_names")));
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IllegalStateException("Failed to map alert_instances row", e);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private String writeJson(Object o) {
|
||||||
|
try { return om.writeValueAsString(o); }
|
||||||
|
catch (Exception e) { throw new IllegalStateException("Failed to serialize JSON", e); }
|
||||||
|
}
|
||||||
|
|
||||||
|
private Timestamp ts(Instant instant) {
|
||||||
|
return instant == null ? null : Timestamp.from(instant);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Array toTextArray(List<String> items) {
|
||||||
|
return jdbc.execute((ConnectionCallback<Array>) conn ->
|
||||||
|
conn.createArrayOf("text", items.toArray()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Array toUuidArray(List<UUID> ids) {
|
||||||
|
return jdbc.execute((ConnectionCallback<Array>) conn ->
|
||||||
|
conn.createArrayOf("uuid", ids.toArray()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Array toUuidArrayFromStrings(List<String> ids) {
|
||||||
|
return jdbc.execute((ConnectionCallback<Array>) conn ->
|
||||||
|
conn.createArrayOf("uuid",
|
||||||
|
ids.stream().map(UUID::fromString).toArray()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> readTextArray(Array arr) throws SQLException {
|
||||||
|
if (arr == null) return List.of();
|
||||||
|
Object[] raw = (Object[]) arr.getArray();
|
||||||
|
List<String> out = new ArrayList<>(raw.length);
|
||||||
|
for (Object o : raw) out.add((String) o);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<UUID> readUuidArray(Array arr) throws SQLException {
|
||||||
|
if (arr == null) return List.of();
|
||||||
|
Object[] raw = (Object[]) arr.getArray();
|
||||||
|
List<UUID> out = new ArrayList<>(raw.length);
|
||||||
|
for (Object o : raw) out.add((UUID) o);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,200 @@
|
|||||||
|
package io.cameleer.server.app.alerting.storage;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertNotification;
|
||||||
|
import io.cameleer.server.core.alerting.AlertNotificationRepository;
|
||||||
|
import io.cameleer.server.core.alerting.NotificationStatus;
|
||||||
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
|
import org.springframework.jdbc.core.RowMapper;
|
||||||
|
|
||||||
|
import java.sql.Timestamp;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public class PostgresAlertNotificationRepository implements AlertNotificationRepository {
|
||||||
|
|
||||||
|
private final JdbcTemplate jdbc;
|
||||||
|
private final ObjectMapper om;
|
||||||
|
|
||||||
|
public PostgresAlertNotificationRepository(JdbcTemplate jdbc, ObjectMapper om) {
|
||||||
|
this.jdbc = jdbc;
|
||||||
|
this.om = om;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AlertNotification save(AlertNotification n) {
|
||||||
|
jdbc.update("""
|
||||||
|
INSERT INTO alert_notifications (
|
||||||
|
id, alert_instance_id, webhook_id, outbound_connection_id,
|
||||||
|
status, attempts, next_attempt_at, claimed_by, claimed_until,
|
||||||
|
last_response_status, last_response_snippet, payload, delivered_at, created_at)
|
||||||
|
VALUES (?, ?, ?, ?,
|
||||||
|
?::notification_status_enum, ?, ?, ?, ?,
|
||||||
|
?, ?, ?::jsonb, ?, ?)
|
||||||
|
ON CONFLICT (id) DO UPDATE SET
|
||||||
|
status = EXCLUDED.status,
|
||||||
|
attempts = EXCLUDED.attempts,
|
||||||
|
next_attempt_at = EXCLUDED.next_attempt_at,
|
||||||
|
claimed_by = EXCLUDED.claimed_by,
|
||||||
|
claimed_until = EXCLUDED.claimed_until,
|
||||||
|
last_response_status = EXCLUDED.last_response_status,
|
||||||
|
last_response_snippet = EXCLUDED.last_response_snippet,
|
||||||
|
payload = EXCLUDED.payload,
|
||||||
|
delivered_at = EXCLUDED.delivered_at
|
||||||
|
""",
|
||||||
|
n.id(), n.alertInstanceId(), n.webhookId(), n.outboundConnectionId(),
|
||||||
|
n.status().name(), n.attempts(), Timestamp.from(n.nextAttemptAt()),
|
||||||
|
n.claimedBy(), n.claimedUntil() == null ? null : Timestamp.from(n.claimedUntil()),
|
||||||
|
n.lastResponseStatus(), n.lastResponseSnippet(),
|
||||||
|
writeJson(n.payload()),
|
||||||
|
n.deliveredAt() == null ? null : Timestamp.from(n.deliveredAt()),
|
||||||
|
Timestamp.from(n.createdAt()));
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<AlertNotification> findById(UUID id) {
|
||||||
|
var list = jdbc.query("SELECT * FROM alert_notifications WHERE id = ?", rowMapper(), id);
|
||||||
|
return list.isEmpty() ? Optional.empty() : Optional.of(list.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<AlertNotification> listForInstance(UUID alertInstanceId) {
|
||||||
|
return jdbc.query("""
|
||||||
|
SELECT * FROM alert_notifications
|
||||||
|
WHERE alert_instance_id = ?
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
""", rowMapper(), alertInstanceId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<AlertNotification> claimDueNotifications(String instanceId, int batchSize, int claimTtlSeconds) {
|
||||||
|
String sql = """
|
||||||
|
UPDATE alert_notifications
|
||||||
|
SET claimed_by = ?, claimed_until = now() + (? || ' seconds')::interval
|
||||||
|
WHERE id IN (
|
||||||
|
SELECT id FROM alert_notifications
|
||||||
|
WHERE status = 'PENDING'::notification_status_enum
|
||||||
|
AND next_attempt_at <= now()
|
||||||
|
AND (claimed_until IS NULL OR claimed_until < now())
|
||||||
|
ORDER BY next_attempt_at
|
||||||
|
LIMIT ?
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING *
|
||||||
|
""";
|
||||||
|
return jdbc.query(sql, rowMapper(), instanceId, claimTtlSeconds, batchSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void markDelivered(UUID id, int status, String snippet, Instant when) {
|
||||||
|
jdbc.update("""
|
||||||
|
UPDATE alert_notifications
|
||||||
|
SET status = 'DELIVERED'::notification_status_enum,
|
||||||
|
last_response_status = ?,
|
||||||
|
last_response_snippet = ?,
|
||||||
|
delivered_at = ?,
|
||||||
|
claimed_by = NULL,
|
||||||
|
claimed_until = NULL
|
||||||
|
WHERE id = ?
|
||||||
|
""", status, snippet, Timestamp.from(when), id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void scheduleRetry(UUID id, Instant nextAttemptAt, int status, String snippet) {
|
||||||
|
jdbc.update("""
|
||||||
|
UPDATE alert_notifications
|
||||||
|
SET attempts = attempts + 1,
|
||||||
|
next_attempt_at = ?,
|
||||||
|
last_response_status = ?,
|
||||||
|
last_response_snippet = ?,
|
||||||
|
claimed_by = NULL,
|
||||||
|
claimed_until = NULL
|
||||||
|
WHERE id = ?
|
||||||
|
""", Timestamp.from(nextAttemptAt), status, snippet, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resetForRetry(UUID id, Instant nextAttemptAt) {
|
||||||
|
jdbc.update("""
|
||||||
|
UPDATE alert_notifications
|
||||||
|
SET attempts = 0,
|
||||||
|
status = 'PENDING'::notification_status_enum,
|
||||||
|
next_attempt_at = ?,
|
||||||
|
claimed_by = NULL,
|
||||||
|
claimed_until = NULL,
|
||||||
|
last_response_status = NULL,
|
||||||
|
last_response_snippet = NULL
|
||||||
|
WHERE id = ?
|
||||||
|
""", Timestamp.from(nextAttemptAt), id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void markFailed(UUID id, int status, String snippet) {
|
||||||
|
jdbc.update("""
|
||||||
|
UPDATE alert_notifications
|
||||||
|
SET status = 'FAILED'::notification_status_enum,
|
||||||
|
attempts = attempts + 1,
|
||||||
|
last_response_status = ?,
|
||||||
|
last_response_snippet = ?,
|
||||||
|
claimed_by = NULL,
|
||||||
|
claimed_until = NULL
|
||||||
|
WHERE id = ?
|
||||||
|
""", status, snippet, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deleteSettledBefore(Instant cutoff) {
|
||||||
|
jdbc.update("""
|
||||||
|
DELETE FROM alert_notifications
|
||||||
|
WHERE status IN ('DELIVERED'::notification_status_enum, 'FAILED'::notification_status_enum)
|
||||||
|
AND created_at < ?
|
||||||
|
""", Timestamp.from(cutoff));
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private RowMapper<AlertNotification> rowMapper() {
|
||||||
|
return (rs, i) -> {
|
||||||
|
try {
|
||||||
|
Map<String, Object> payload = om.readValue(
|
||||||
|
rs.getString("payload"), new TypeReference<>() {});
|
||||||
|
Timestamp claimedUntil = rs.getTimestamp("claimed_until");
|
||||||
|
Timestamp deliveredAt = rs.getTimestamp("delivered_at");
|
||||||
|
Object lastStatus = rs.getObject("last_response_status");
|
||||||
|
|
||||||
|
Object webhookIdObj = rs.getObject("webhook_id");
|
||||||
|
UUID webhookId = webhookIdObj == null ? null : (UUID) webhookIdObj;
|
||||||
|
Object connIdObj = rs.getObject("outbound_connection_id");
|
||||||
|
UUID connId = connIdObj == null ? null : (UUID) connIdObj;
|
||||||
|
|
||||||
|
return new AlertNotification(
|
||||||
|
(UUID) rs.getObject("id"),
|
||||||
|
(UUID) rs.getObject("alert_instance_id"),
|
||||||
|
webhookId,
|
||||||
|
connId,
|
||||||
|
NotificationStatus.valueOf(rs.getString("status")),
|
||||||
|
rs.getInt("attempts"),
|
||||||
|
rs.getTimestamp("next_attempt_at").toInstant(),
|
||||||
|
rs.getString("claimed_by"),
|
||||||
|
claimedUntil == null ? null : claimedUntil.toInstant(),
|
||||||
|
lastStatus == null ? null : ((Number) lastStatus).intValue(),
|
||||||
|
rs.getString("last_response_snippet"),
|
||||||
|
payload,
|
||||||
|
deliveredAt == null ? null : deliveredAt.toInstant(),
|
||||||
|
rs.getTimestamp("created_at").toInstant());
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IllegalStateException("Failed to map alert_notifications row", e);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private String writeJson(Object o) {
|
||||||
|
try { return om.writeValueAsString(o); }
|
||||||
|
catch (Exception e) { throw new IllegalStateException("Failed to serialize JSON", e); }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,229 @@
|
|||||||
|
package io.cameleer.server.app.alerting.storage;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.*;
|
||||||
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
|
import org.springframework.jdbc.core.RowMapper;
|
||||||
|
|
||||||
|
import java.sql.Timestamp;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class PostgresAlertRuleRepository implements AlertRuleRepository {
|
||||||
|
|
||||||
|
private final JdbcTemplate jdbc;
|
||||||
|
private final ObjectMapper om;
|
||||||
|
|
||||||
|
public PostgresAlertRuleRepository(JdbcTemplate jdbc, ObjectMapper om) {
|
||||||
|
this.jdbc = jdbc;
|
||||||
|
this.om = om;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AlertRule save(AlertRule r) {
|
||||||
|
String sql = """
|
||||||
|
INSERT INTO alert_rules (id, environment_id, name, description, severity, enabled,
|
||||||
|
condition_kind, condition, evaluation_interval_seconds, for_duration_seconds,
|
||||||
|
re_notify_minutes, notification_title_tmpl, notification_message_tmpl,
|
||||||
|
webhooks, next_evaluation_at, claimed_by, claimed_until, eval_state,
|
||||||
|
created_at, created_by, updated_at, updated_by)
|
||||||
|
VALUES (?, ?, ?, ?, ?::severity_enum, ?, ?::condition_kind_enum, ?::jsonb, ?, ?, ?, ?, ?, ?::jsonb,
|
||||||
|
?, ?, ?, ?::jsonb, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT (id) DO UPDATE SET
|
||||||
|
name = EXCLUDED.name, description = EXCLUDED.description,
|
||||||
|
severity = EXCLUDED.severity, enabled = EXCLUDED.enabled,
|
||||||
|
condition_kind = EXCLUDED.condition_kind, condition = EXCLUDED.condition,
|
||||||
|
evaluation_interval_seconds = EXCLUDED.evaluation_interval_seconds,
|
||||||
|
for_duration_seconds = EXCLUDED.for_duration_seconds,
|
||||||
|
re_notify_minutes = EXCLUDED.re_notify_minutes,
|
||||||
|
notification_title_tmpl = EXCLUDED.notification_title_tmpl,
|
||||||
|
notification_message_tmpl = EXCLUDED.notification_message_tmpl,
|
||||||
|
webhooks = EXCLUDED.webhooks, eval_state = EXCLUDED.eval_state,
|
||||||
|
updated_at = EXCLUDED.updated_at, updated_by = EXCLUDED.updated_by
|
||||||
|
""";
|
||||||
|
jdbc.update(sql,
|
||||||
|
r.id(), r.environmentId(), r.name(), r.description(),
|
||||||
|
r.severity().name(), r.enabled(), r.conditionKind().name(),
|
||||||
|
writeJson(r.condition()),
|
||||||
|
r.evaluationIntervalSeconds(), r.forDurationSeconds(), r.reNotifyMinutes(),
|
||||||
|
r.notificationTitleTmpl(), r.notificationMessageTmpl(),
|
||||||
|
writeJson(r.webhooks()),
|
||||||
|
Timestamp.from(r.nextEvaluationAt()),
|
||||||
|
r.claimedBy(),
|
||||||
|
r.claimedUntil() == null ? null : Timestamp.from(r.claimedUntil()),
|
||||||
|
writeJson(r.evalState()),
|
||||||
|
Timestamp.from(r.createdAt()), r.createdBy(),
|
||||||
|
Timestamp.from(r.updatedAt()), r.updatedBy());
|
||||||
|
saveTargets(r.id(), r.targets());
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveTargets(UUID ruleId, List<AlertRuleTarget> targets) {
|
||||||
|
jdbc.update("DELETE FROM alert_rule_targets WHERE rule_id = ?", ruleId);
|
||||||
|
if (targets == null || targets.isEmpty()) return;
|
||||||
|
jdbc.batchUpdate(
|
||||||
|
"INSERT INTO alert_rule_targets (id, rule_id, target_kind, target_id) VALUES (?, ?, ?::target_kind_enum, ?)",
|
||||||
|
targets, targets.size(), (ps, t) -> {
|
||||||
|
ps.setObject(1, t.id() != null ? t.id() : UUID.randomUUID());
|
||||||
|
ps.setObject(2, ruleId);
|
||||||
|
ps.setString(3, t.kind().name());
|
||||||
|
ps.setString(4, t.targetId());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<AlertRule> findById(UUID id) {
|
||||||
|
var list = jdbc.query("SELECT * FROM alert_rules WHERE id = ?", rowMapper(), id);
|
||||||
|
if (list.isEmpty()) return Optional.empty();
|
||||||
|
return Optional.of(withTargets(list).get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<AlertRule> listByEnvironment(UUID environmentId) {
|
||||||
|
var list = jdbc.query(
|
||||||
|
"SELECT * FROM alert_rules WHERE environment_id = ? ORDER BY created_at DESC",
|
||||||
|
rowMapper(), environmentId);
|
||||||
|
return withTargets(list);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<AlertRule> findAllByOutboundConnectionId(UUID connectionId) {
|
||||||
|
String sql = """
|
||||||
|
SELECT * FROM alert_rules
|
||||||
|
WHERE webhooks @> ?::jsonb
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
""";
|
||||||
|
String predicate = "[{\"outboundConnectionId\":\"" + connectionId + "\"}]";
|
||||||
|
return jdbc.query(sql, rowMapper(), predicate);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<UUID> findRuleIdsByOutboundConnectionId(UUID connectionId) {
|
||||||
|
String sql = """
|
||||||
|
SELECT id FROM alert_rules
|
||||||
|
WHERE webhooks @> ?::jsonb
|
||||||
|
""";
|
||||||
|
String predicate = "[{\"outboundConnectionId\":\"" + connectionId + "\"}]";
|
||||||
|
return jdbc.queryForList(sql, UUID.class, predicate);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void delete(UUID id) {
|
||||||
|
jdbc.update("DELETE FROM alert_rules WHERE id = ?", id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long count() {
|
||||||
|
Long n = jdbc.queryForObject("SELECT COUNT(*) FROM alert_rules", Long.class);
|
||||||
|
return n == null ? 0L : n;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<AlertRule> claimDueRules(String instanceId, int batchSize, int claimTtlSeconds) {
|
||||||
|
String sql = """
|
||||||
|
UPDATE alert_rules
|
||||||
|
SET claimed_by = ?, claimed_until = now() + (? || ' seconds')::interval
|
||||||
|
WHERE id IN (
|
||||||
|
SELECT id FROM alert_rules
|
||||||
|
WHERE enabled = true
|
||||||
|
AND next_evaluation_at <= now()
|
||||||
|
AND (claimed_until IS NULL OR claimed_until < now())
|
||||||
|
ORDER BY next_evaluation_at
|
||||||
|
LIMIT ?
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING *
|
||||||
|
""";
|
||||||
|
List<AlertRule> rules = jdbc.query(sql, rowMapper(), instanceId, claimTtlSeconds, batchSize);
|
||||||
|
return withTargets(rules);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Batch-loads targets for the given rules and returns new rule instances with targets populated. */
|
||||||
|
private List<AlertRule> withTargets(List<AlertRule> rules) {
|
||||||
|
if (rules.isEmpty()) return rules;
|
||||||
|
// Build IN clause
|
||||||
|
String inClause = rules.stream()
|
||||||
|
.map(r -> "'" + r.id() + "'")
|
||||||
|
.collect(java.util.stream.Collectors.joining(","));
|
||||||
|
String sql = "SELECT * FROM alert_rule_targets WHERE rule_id IN (" + inClause + ")";
|
||||||
|
Map<UUID, List<AlertRuleTarget>> byRuleId = new HashMap<>();
|
||||||
|
jdbc.query(sql, rs -> {
|
||||||
|
UUID ruleId = (UUID) rs.getObject("rule_id");
|
||||||
|
AlertRuleTarget t = new AlertRuleTarget(
|
||||||
|
(UUID) rs.getObject("id"),
|
||||||
|
ruleId,
|
||||||
|
TargetKind.valueOf(rs.getString("target_kind")),
|
||||||
|
rs.getString("target_id"));
|
||||||
|
byRuleId.computeIfAbsent(ruleId, k -> new ArrayList<>()).add(t);
|
||||||
|
});
|
||||||
|
return rules.stream()
|
||||||
|
.map(r -> new AlertRule(
|
||||||
|
r.id(), r.environmentId(), r.name(), r.description(),
|
||||||
|
r.severity(), r.enabled(), r.conditionKind(), r.condition(),
|
||||||
|
r.evaluationIntervalSeconds(), r.forDurationSeconds(), r.reNotifyMinutes(),
|
||||||
|
r.notificationTitleTmpl(), r.notificationMessageTmpl(),
|
||||||
|
r.webhooks(), byRuleId.getOrDefault(r.id(), List.of()),
|
||||||
|
r.nextEvaluationAt(), r.claimedBy(), r.claimedUntil(), r.evalState(),
|
||||||
|
r.createdAt(), r.createdBy(), r.updatedAt(), r.updatedBy()))
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void releaseClaim(UUID ruleId, Instant nextEvaluationAt, Map<String, Object> evalState) {
|
||||||
|
jdbc.update("""
|
||||||
|
UPDATE alert_rules
|
||||||
|
SET claimed_by = NULL, claimed_until = NULL,
|
||||||
|
next_evaluation_at = ?, eval_state = ?::jsonb
|
||||||
|
WHERE id = ?
|
||||||
|
""",
|
||||||
|
Timestamp.from(nextEvaluationAt), writeJson(evalState), ruleId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private RowMapper<AlertRule> rowMapper() {
|
||||||
|
return (rs, i) -> {
|
||||||
|
try {
|
||||||
|
ConditionKind kind = ConditionKind.valueOf(rs.getString("condition_kind"));
|
||||||
|
AlertCondition cond = om.readValue(rs.getString("condition"), AlertCondition.class);
|
||||||
|
List<WebhookBinding> webhooks = om.readValue(
|
||||||
|
rs.getString("webhooks"), new TypeReference<>() {});
|
||||||
|
Map<String, Object> evalState = om.readValue(
|
||||||
|
rs.getString("eval_state"), new TypeReference<>() {});
|
||||||
|
|
||||||
|
Timestamp cu = rs.getTimestamp("claimed_until");
|
||||||
|
return new AlertRule(
|
||||||
|
(UUID) rs.getObject("id"),
|
||||||
|
(UUID) rs.getObject("environment_id"),
|
||||||
|
rs.getString("name"),
|
||||||
|
rs.getString("description"),
|
||||||
|
AlertSeverity.valueOf(rs.getString("severity")),
|
||||||
|
rs.getBoolean("enabled"),
|
||||||
|
kind, cond,
|
||||||
|
rs.getInt("evaluation_interval_seconds"),
|
||||||
|
rs.getInt("for_duration_seconds"),
|
||||||
|
rs.getInt("re_notify_minutes"),
|
||||||
|
rs.getString("notification_title_tmpl"),
|
||||||
|
rs.getString("notification_message_tmpl"),
|
||||||
|
webhooks, List.of(),
|
||||||
|
rs.getTimestamp("next_evaluation_at").toInstant(),
|
||||||
|
rs.getString("claimed_by"),
|
||||||
|
cu == null ? null : cu.toInstant(),
|
||||||
|
evalState,
|
||||||
|
rs.getTimestamp("created_at").toInstant(),
|
||||||
|
rs.getString("created_by"),
|
||||||
|
rs.getTimestamp("updated_at").toInstant(),
|
||||||
|
rs.getString("updated_by"));
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IllegalStateException("Failed to map alert_rules row", e);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private String writeJson(Object o) {
|
||||||
|
try {
|
||||||
|
return om.writeValueAsString(o);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IllegalStateException("Failed to serialize to JSON", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,101 @@
|
|||||||
|
package io.cameleer.server.app.alerting.storage;
|
||||||
|
|
||||||
|
import io.cameleer.server.core.alerting.AlertSilence;
|
||||||
|
import io.cameleer.server.core.alerting.AlertSilenceRepository;
|
||||||
|
import io.cameleer.server.core.alerting.AlertSeverity;
|
||||||
|
import io.cameleer.server.core.alerting.SilenceMatcher;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
|
import org.springframework.jdbc.core.RowMapper;
|
||||||
|
|
||||||
|
import java.sql.Timestamp;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public class PostgresAlertSilenceRepository implements AlertSilenceRepository {
|
||||||
|
|
||||||
|
private final JdbcTemplate jdbc;
|
||||||
|
private final ObjectMapper om;
|
||||||
|
|
||||||
|
public PostgresAlertSilenceRepository(JdbcTemplate jdbc, ObjectMapper om) {
|
||||||
|
this.jdbc = jdbc;
|
||||||
|
this.om = om;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AlertSilence save(AlertSilence s) {
|
||||||
|
jdbc.update("""
|
||||||
|
INSERT INTO alert_silences (id, environment_id, matcher, reason, starts_at, ends_at, created_by, created_at)
|
||||||
|
VALUES (?, ?, ?::jsonb, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT (id) DO UPDATE SET
|
||||||
|
matcher = EXCLUDED.matcher,
|
||||||
|
reason = EXCLUDED.reason,
|
||||||
|
starts_at = EXCLUDED.starts_at,
|
||||||
|
ends_at = EXCLUDED.ends_at
|
||||||
|
""",
|
||||||
|
s.id(), s.environmentId(), writeJson(s.matcher()),
|
||||||
|
s.reason(),
|
||||||
|
Timestamp.from(s.startsAt()), Timestamp.from(s.endsAt()),
|
||||||
|
s.createdBy(), Timestamp.from(s.createdAt()));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<AlertSilence> findById(UUID id) {
|
||||||
|
var list = jdbc.query("SELECT * FROM alert_silences WHERE id = ?", rowMapper(), id);
|
||||||
|
return list.isEmpty() ? Optional.empty() : Optional.of(list.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<AlertSilence> listActive(UUID environmentId, Instant when) {
|
||||||
|
Timestamp t = Timestamp.from(when);
|
||||||
|
return jdbc.query("""
|
||||||
|
SELECT * FROM alert_silences
|
||||||
|
WHERE environment_id = ?
|
||||||
|
AND starts_at <= ? AND ends_at >= ?
|
||||||
|
ORDER BY starts_at
|
||||||
|
""", rowMapper(), environmentId, t, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<AlertSilence> listByEnvironment(UUID environmentId) {
|
||||||
|
return jdbc.query("""
|
||||||
|
SELECT * FROM alert_silences
|
||||||
|
WHERE environment_id = ?
|
||||||
|
ORDER BY starts_at DESC
|
||||||
|
""", rowMapper(), environmentId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void delete(UUID id) {
|
||||||
|
jdbc.update("DELETE FROM alert_silences WHERE id = ?", id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private RowMapper<AlertSilence> rowMapper() {
|
||||||
|
return (rs, i) -> {
|
||||||
|
try {
|
||||||
|
SilenceMatcher matcher = om.readValue(rs.getString("matcher"), SilenceMatcher.class);
|
||||||
|
return new AlertSilence(
|
||||||
|
(UUID) rs.getObject("id"),
|
||||||
|
(UUID) rs.getObject("environment_id"),
|
||||||
|
matcher,
|
||||||
|
rs.getString("reason"),
|
||||||
|
rs.getTimestamp("starts_at").toInstant(),
|
||||||
|
rs.getTimestamp("ends_at").toInstant(),
|
||||||
|
rs.getString("created_by"),
|
||||||
|
rs.getTimestamp("created_at").toInstant());
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IllegalStateException("Failed to map alert_silences row", e);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private String writeJson(Object o) {
|
||||||
|
try { return om.writeValueAsString(o); }
|
||||||
|
catch (Exception e) { throw new IllegalStateException("Failed to serialize JSON", e); }
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
package com.cameleer.server.app.analytics;
|
package io.cameleer.server.app.analytics;
|
||||||
|
|
||||||
import com.cameleer.server.app.storage.ClickHouseUsageTracker;
|
import io.cameleer.server.app.storage.ClickHouseUsageTracker;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.scheduling.annotation.Scheduled;
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
package com.cameleer.server.app.analytics;
|
package io.cameleer.server.app.analytics;
|
||||||
|
|
||||||
import com.cameleer.server.core.analytics.UsageEvent;
|
import io.cameleer.server.core.analytics.UsageEvent;
|
||||||
import com.cameleer.server.core.analytics.UsageTracker;
|
import io.cameleer.server.core.analytics.UsageTracker;
|
||||||
import jakarta.servlet.http.HttpServletRequest;
|
import jakarta.servlet.http.HttpServletRequest;
|
||||||
import jakarta.servlet.http.HttpServletResponse;
|
import jakarta.servlet.http.HttpServletResponse;
|
||||||
import org.springframework.security.core.Authentication;
|
import org.springframework.security.core.Authentication;
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
package com.cameleer.server.app.config;
|
package io.cameleer.server.app.config;
|
||||||
|
|
||||||
import com.cameleer.server.core.agent.AgentEventRepository;
|
import io.cameleer.server.core.agent.AgentEventRepository;
|
||||||
import com.cameleer.server.core.agent.AgentEventService;
|
import io.cameleer.server.core.agent.AgentEventService;
|
||||||
import com.cameleer.server.core.agent.AgentRegistryService;
|
import io.cameleer.server.core.agent.AgentRegistryService;
|
||||||
import com.cameleer.server.core.agent.RouteStateRegistry;
|
import io.cameleer.server.core.agent.RouteStateRegistry;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
|
||||||
@@ -17,11 +17,13 @@ import org.springframework.context.annotation.Configuration;
|
|||||||
public class AgentRegistryBeanConfig {
|
public class AgentRegistryBeanConfig {
|
||||||
|
|
||||||
@Bean
|
@Bean
|
||||||
public AgentRegistryService agentRegistryService(AgentRegistryConfig config) {
|
public AgentRegistryService agentRegistryService(AgentRegistryConfig config,
|
||||||
|
io.cameleer.server.app.license.LicenseEnforcer enforcer) {
|
||||||
return new AgentRegistryService(
|
return new AgentRegistryService(
|
||||||
config.getStaleThresholdMs(),
|
config.getStaleThresholdMs(),
|
||||||
config.getDeadThresholdMs(),
|
config.getDeadThresholdMs(),
|
||||||
config.getCommandExpiryMs()
|
config.getCommandExpiryMs(),
|
||||||
|
current -> enforcer.assertWithinCap("max_agents", current, 1)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.cameleer.server.app.config;
|
package io.cameleer.server.app.config;
|
||||||
|
|
||||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.cameleer.server.app.config;
|
package io.cameleer.server.app.config;
|
||||||
|
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import org.springframework.beans.factory.annotation.Qualifier;
|
import org.springframework.beans.factory.annotation.Qualifier;
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.cameleer.server.app.config;
|
package io.cameleer.server.app.config;
|
||||||
|
|
||||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.cameleer.server.app.config;
|
package io.cameleer.server.app.config;
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@@ -26,9 +26,14 @@ public class ClickHouseSchemaInitializer {
|
|||||||
|
|
||||||
@EventListener(ApplicationReadyEvent.class)
|
@EventListener(ApplicationReadyEvent.class)
|
||||||
public void initializeSchema() {
|
public void initializeSchema() {
|
||||||
|
runScript("clickhouse/init.sql");
|
||||||
|
runScript("clickhouse/alerting_projections.sql");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void runScript(String classpathResource) {
|
||||||
try {
|
try {
|
||||||
PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
|
PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
|
||||||
Resource script = resolver.getResource("classpath:clickhouse/init.sql");
|
Resource script = resolver.getResource("classpath:" + classpathResource);
|
||||||
|
|
||||||
String sql = script.getContentAsString(StandardCharsets.UTF_8);
|
String sql = script.getContentAsString(StandardCharsets.UTF_8);
|
||||||
log.info("Executing ClickHouse schema: {}", script.getFilename());
|
log.info("Executing ClickHouse schema: {}", script.getFilename());
|
||||||
@@ -41,13 +46,28 @@ public class ClickHouseSchemaInitializer {
|
|||||||
.filter(line -> !line.isEmpty())
|
.filter(line -> !line.isEmpty())
|
||||||
.reduce("", (a, b) -> a + b);
|
.reduce("", (a, b) -> a + b);
|
||||||
if (!withoutComments.isEmpty()) {
|
if (!withoutComments.isEmpty()) {
|
||||||
|
String upper = withoutComments.toUpperCase();
|
||||||
|
boolean isBestEffort = upper.contains("MATERIALIZE PROJECTION")
|
||||||
|
|| upper.contains("ADD PROJECTION");
|
||||||
|
try {
|
||||||
clickHouseJdbc.execute(trimmed);
|
clickHouseJdbc.execute(trimmed);
|
||||||
|
} catch (Exception e) {
|
||||||
|
if (isBestEffort) {
|
||||||
|
// ADD PROJECTION on ReplacingMergeTree requires a session setting not available
|
||||||
|
// via JDBC pool; MATERIALIZE can fail on empty tables — both are non-fatal.
|
||||||
|
log.warn("Projection DDL step skipped (non-fatal): {} — {}",
|
||||||
|
trimmed.substring(0, Math.min(trimmed.length(), 120)), e.getMessage());
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info("ClickHouse schema initialization complete");
|
log.info("ClickHouse schema script complete: {}", script.getFilename());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("ClickHouse schema initialization failed — server will continue but ClickHouse features may not work", e);
|
log.error("ClickHouse schema script failed [{}] — server will continue but ClickHouse features may not work",
|
||||||
|
classpathResource, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
package com.cameleer.server.app.config;
|
package io.cameleer.server.app.config;
|
||||||
|
|
||||||
import com.cameleer.server.app.diagram.ElkDiagramRenderer;
|
import io.cameleer.server.app.diagram.ElkDiagramRenderer;
|
||||||
import com.cameleer.server.core.diagram.DiagramRenderer;
|
import io.cameleer.server.core.diagram.DiagramRenderer;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
package com.cameleer.server.app.config;
|
package io.cameleer.server.app.config;
|
||||||
|
|
||||||
import com.cameleer.server.core.ingestion.BufferedLogEntry;
|
import io.cameleer.server.core.ingestion.BufferedLogEntry;
|
||||||
import com.cameleer.server.core.ingestion.ChunkAccumulator;
|
import io.cameleer.server.core.ingestion.ChunkAccumulator;
|
||||||
import com.cameleer.server.core.ingestion.MergedExecution;
|
import io.cameleer.server.core.ingestion.MergedExecution;
|
||||||
import com.cameleer.server.core.ingestion.WriteBuffer;
|
import io.cameleer.server.core.ingestion.WriteBuffer;
|
||||||
import com.cameleer.server.core.storage.model.MetricsSnapshot;
|
import io.cameleer.server.core.storage.model.MetricsSnapshot;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
|
||||||
@@ -12,7 +12,7 @@ import org.springframework.context.annotation.Configuration;
|
|||||||
* Creates write buffer beans for the ingestion pipeline.
|
* Creates write buffer beans for the ingestion pipeline.
|
||||||
* <p>
|
* <p>
|
||||||
* Each {@link WriteBuffer} instance is shared between the
|
* Each {@link WriteBuffer} instance is shared between the
|
||||||
* {@link com.cameleer.server.core.ingestion.IngestionService} (producer side)
|
* {@link io.cameleer.server.core.ingestion.IngestionService} (producer side)
|
||||||
* and the flush scheduler (consumer side).
|
* and the flush scheduler (consumer side).
|
||||||
*/
|
*/
|
||||||
@Configuration
|
@Configuration
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user