Skip to content

Commit de277d7

Browse files
committed
[Lib][LogRef] Use LogFormat to isolate body when extracting vars
In order to match a variable with the log line a regex is used. Previously this was using \w, which isn't correct since if there was punctuation, then the capture would stop at it. This tries to improve the variable extraction by using the log format to isolate the body and then using the more permissive .+ to capture the variables. There's some additional refactoring I'd like to do to combine LogRef and LogFormat into a singular struct now that there is more there.
1 parent 2ad6a94 commit de277d7

File tree

4 files changed

+132
-40
lines changed

4 files changed

+132
-40
lines changed

src/lib.rs

Lines changed: 118 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -98,20 +98,50 @@ pub struct LogMapping<'a> {
9898
#[derive(Copy, Clone, Debug, PartialEq)]
9999
pub struct LogRef<'a> {
100100
pub line: &'a str,
101+
details: Option<LogDetails<'a>>,
102+
}
103+
104+
#[derive(Copy, Clone, Debug, PartialEq)]
105+
struct LogDetails<'a> {
106+
file: Option<&'a str>,
107+
lineno: Option<u32>,
108+
body: Option<&'a str>,
109+
}
110+
111+
impl<'a> LogRef<'a> {
112+
pub fn new(line: &'a str) -> Self {
113+
Self {
114+
line,
115+
details: None,
116+
}
117+
}
118+
119+
pub fn with_format(line: &'a str, log_format: LogFormat) -> Self {
120+
let captures = log_format.captures(line);
121+
let file = captures.name("file").map(|file_match| file_match.as_str());
122+
let lineno = captures
123+
.name("line")
124+
.and_then(|lineno| lineno.as_str().parse::<u32>().ok());
125+
let body = captures.name("body").map(|body| body.as_str());
126+
Self {
127+
line,
128+
details: Some(LogDetails { file, lineno, body }),
129+
}
130+
}
101131
}
102132

103133
pub fn link_to_source<'a>(log_ref: &LogRef, src_refs: &'a [SourceRef]) -> Option<&'a SourceRef> {
104134
src_refs
105135
.iter()
106-
.find(|&source_ref| source_ref.captures(log_ref).is_some())
136+
.find(|&source_ref| source_ref.captures(log_ref.line).is_some())
107137
}
108138

109139
pub fn lookup_source<'a>(
110140
log_ref: &LogRef,
111141
log_format: &LogFormat,
112142
src_refs: &'a [SourceRef],
113143
) -> Option<&'a SourceRef> {
114-
let captures = log_format.captures(log_ref);
144+
let captures = log_format.captures(log_ref.line);
115145
let file_name = captures.name("file").map_or("", |m| m.as_str());
116146
let line_no: usize = captures
117147
.name("line")
@@ -125,12 +155,16 @@ pub fn lookup_source<'a>(
125155
}
126156

127157
pub fn extract_variables<'a>(
128-
log_line: LogRef<'a>,
158+
log_ref: LogRef<'a>,
129159
src_ref: &'a SourceRef,
130160
) -> HashMap<String, String> {
131161
let mut variables = HashMap::new();
162+
let line = match log_ref.details {
163+
Some(details) => details.body.unwrap_or(log_ref.line),
164+
None => log_ref.line,
165+
};
132166
if !src_ref.vars.is_empty() {
133-
if let Some(captures) = src_ref.captures(&log_line) {
167+
if let Some(captures) = src_ref.captures(line) {
134168
for i in 0..captures.len() - 1 {
135169
variables.insert(
136170
src_ref.vars[i].to_string(),
@@ -143,19 +177,22 @@ pub fn extract_variables<'a>(
143177
variables
144178
}
145179

146-
pub fn filter_log(buffer: &str, filter: Filter) -> Vec<LogRef> {
147-
let results = buffer
180+
pub fn filter_log(buffer: &str, filter: Filter, log_format: Option<String>) -> Vec<LogRef> {
181+
let log_format = LogFormat::new(log_format);
182+
buffer
148183
.lines()
149184
.enumerate()
150185
.filter_map(|(line_no, line)| {
151186
if filter.start <= line_no && line_no < filter.end {
152-
Some(LogRef { line })
187+
match &log_format {
188+
Some(format) => Some(LogRef::with_format(line, format.clone())),
189+
None => Some(LogRef::new(line)),
190+
}
153191
} else {
154192
None
155193
}
156194
})
157-
.collect();
158-
results
195+
.collect()
159196
}
160197

161198
pub fn do_mappings<'a>(
@@ -168,7 +205,7 @@ pub fn do_mappings<'a>(
168205
let mut sources = CodeSource::find_code(sources, source_filter);
169206
let src_logs = extract_logging(&mut sources);
170207
let call_graph = CallGraph::new(&mut sources);
171-
let use_hints = log_format.clone().is_some_and(|f| f.has_hints());
208+
let use_hints = log_format.clone().is_some_and(|f| f.has_src_hint());
172209

173210
log_refs
174211
.into_iter()
@@ -283,23 +320,47 @@ mod tests {
283320
#[test]
284321
fn test_filter_log_defaults() {
285322
let buffer = String::from("hello\nwarning\nerror\nboom");
286-
let result = filter_log(&buffer, Filter::default());
323+
let result = filter_log(&buffer, Filter::default(), None);
287324
assert_eq!(
288325
result,
289326
vec![
290-
LogRef { line: "hello" },
291-
LogRef { line: "warning" },
292-
LogRef { line: "error" },
293-
LogRef { line: "boom" }
327+
LogRef::new("hello"),
328+
LogRef::new("warning"),
329+
LogRef::new("error"),
330+
LogRef::new("boom"),
294331
]
295332
);
296333
}
297334

298335
#[test]
299336
fn test_filter_log_with_filter() {
300337
let buffer = String::from("hello\nwarning\nerror\nboom");
301-
let result = filter_log(&buffer, Filter { start: 1, end: 2 });
302-
assert_eq!(result, vec![LogRef { line: "warning" }]);
338+
let result = filter_log(&buffer, Filter { start: 1, end: 2 }, None);
339+
assert_eq!(result, vec![LogRef::new("warning")]);
340+
}
341+
342+
#[test]
343+
fn test_filter_log_with_format() {
344+
let buffer = String::from(
345+
"2025-04-10 22:12:52 INFO JvmPauseMonitor:146 - JvmPauseMonitor-n0: Started",
346+
);
347+
let regex = String::from(
348+
r"^(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) (?<level>\w+)\s+ (?<file>[\w$.]+):(?<line>\d+) - (?<body>.*)$",
349+
);
350+
let log_format = Some(regex);
351+
let result = filter_log(&buffer, Filter::default(), log_format);
352+
let details = Some(LogDetails {
353+
file: Some("JvmPauseMonitor"),
354+
lineno: Some(146),
355+
body: Some("JvmPauseMonitor-n0: Started"),
356+
});
357+
assert_eq!(
358+
result,
359+
vec![LogRef {
360+
line: "2025-04-10 22:12:52 INFO JvmPauseMonitor:146 - JvmPauseMonitor-n0: Started",
361+
details
362+
}]
363+
);
303364
}
304365

305366
const TEST_SOURCE: &str = r#"
@@ -345,9 +406,8 @@ fn nope(i: u32) {
345406

346407
#[test]
347408
fn test_link_to_source() {
348-
let log_ref = LogRef {
349-
line: "[2024-02-15T03:46:44Z DEBUG stack] you're only as funky as your last cut",
350-
};
409+
let log_ref =
410+
LogRef::new("[2024-02-15T03:46:44Z DEBUG stack] you're only as funky as your last cut");
351411
let code = CodeSource::new(PathBuf::from("in-mem.rs"), Box::new(TEST_SOURCE.as_bytes()));
352412
let src_refs = extract_logging(&mut [code]);
353413
assert_eq!(src_refs.len(), 2);
@@ -357,10 +417,7 @@ fn nope(i: u32) {
357417

358418
#[test]
359419
fn test_link_to_source_no_matches() {
360-
let log_ref = LogRef {
361-
line: "[2024-02-26T03:44:40Z DEBUG stack] nope!",
362-
};
363-
420+
let log_ref = LogRef::new("[2024-02-26T03:44:40Z DEBUG stack] nope!");
364421
let code = CodeSource::new(PathBuf::from("in-mem.rs"), Box::new(TEST_SOURCE.as_bytes()));
365422
let src_refs = extract_logging(&mut [code]);
366423
assert_eq!(src_refs.len(), 2);
@@ -370,16 +427,50 @@ fn nope(i: u32) {
370427

371428
#[test]
372429
fn test_extract_variables() {
373-
let log_ref = LogRef {
374-
line: "[2024-02-15T03:46:44Z DEBUG nope] this won't match i=1",
375-
};
430+
let log_ref = LogRef::new("[2024-02-15T03:46:44Z DEBUG nope] this won't match i=1");
376431
let code = CodeSource::new(PathBuf::from("in-mem.rs"), Box::new(TEST_SOURCE.as_bytes()));
377432
let src_refs = extract_logging(&mut [code]);
378433
assert_eq!(src_refs.len(), 2);
379434
let vars = extract_variables(log_ref, &src_refs[1]);
380435
assert_eq!(vars.get("i").map(|val| val.as_str()), Some("1"));
381436
}
382437

438+
const TEST_PUNC_SRC: &str = r#"""
439+
private void run() {
440+
LOG.info("{}: Started", this);
441+
try {
442+
for (; Thread.currentThread().equals(threadRef.get()); ) {
443+
detectPause();
444+
}
445+
} finally {
446+
LOG.info("{}: Stopped", this);
447+
}
448+
}
449+
"""#;
450+
#[test]
451+
fn test_extract_var_punctuation() {
452+
let regex = String::from(
453+
r"^(?<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) (?<level>\w+)\s+ (?<file>[\w$.]+):(?<line>\d+) - (?<body>.*)$",
454+
);
455+
let log_format = Some(regex);
456+
let log_format = LogFormat::new(log_format).unwrap();
457+
let log_ref = LogRef::with_format(
458+
"2025-04-10 22:12:52 INFO JvmPauseMonitor:146 - JvmPauseMonitor-n0: Started",
459+
log_format,
460+
);
461+
let code = CodeSource::new(
462+
PathBuf::from("in-mem.java"),
463+
Box::new(TEST_PUNC_SRC.as_bytes()),
464+
);
465+
let src_refs = extract_logging(&mut [code]);
466+
assert_eq!(src_refs.len(), 2);
467+
let vars = extract_variables(log_ref, &src_refs[0]);
468+
assert_eq!(
469+
vars.get("this").map(|val| val.as_str()),
470+
Some("JvmPauseMonitor-n0")
471+
);
472+
}
473+
383474
#[test]
384475
fn test_find_possible_paths() {
385476
let code = CodeSource::new(PathBuf::from("in-mem.rs"), Box::new(TEST_SOURCE.as_bytes()));

src/log_format.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,26 @@ impl LogFormat {
1515
})
1616
}
1717

18-
pub fn has_hints(self: LogFormat) -> bool {
18+
pub fn has_src_hint(self: LogFormat) -> bool {
1919
let mut flatten = self.regex.capture_names().flatten();
2020
flatten.any(|name| name == "line") && flatten.any(|name| name == "file")
2121
}
2222

2323
pub fn build_src_filter(&self, log_refs: &Vec<LogRef>) -> Vec<String> {
2424
let mut results = Vec::new();
2525
for log_ref in log_refs {
26-
let captures = self.captures(log_ref);
26+
let captures = self.captures(log_ref.line);
2727
if let Some(file_match) = captures.name("file") {
2828
results.push(file_match.as_str().to_string());
2929
}
3030
}
3131
results
3232
}
3333

34-
pub fn captures<'a>(&self, log_ref: &LogRef<'a>) -> Captures<'a> {
34+
pub fn captures<'a>(&self, line: &'a str) -> Captures<'a> {
3535
self.regex
36-
.captures(log_ref.line)
37-
.unwrap_or_else(|| panic!("Couldn't match `{}` with `{:?}`", log_ref.line, self.regex))
36+
.captures(line)
37+
.unwrap_or_else(|| panic!("Couldn't match `{}` with `{:?}`", line, self.regex))
3838
}
3939
}
4040

src/main.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ fn main() -> Result<(), Box<dyn Error>> {
4343
end: args.end.unwrap_or(usize::MAX),
4444
};
4545

46-
let filtered = filter_log(&buffer, filter);
47-
let log_mappings = do_mappings(filtered, &args.sources, args.format);
46+
let filtered = filter_log(&buffer, filter, args.format.clone());
47+
// TODO: try and remove log format here
48+
let log_mappings = do_mappings(filtered, &args.sources, args.format.clone());
4849

4950
for mapping in log_mappings {
5051
let serialized = serde_json::to_string(&mapping).unwrap();

src/source_ref.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use core::fmt;
33
use regex::{Captures, Regex};
44
use serde::Serialize;
55

6-
use crate::{CodeSource, LogRef, QueryResult};
6+
use crate::{CodeSource, QueryResult};
77

88
// TODO: get rid of this clone?
99
#[derive(Clone, Debug, Serialize)]
@@ -48,8 +48,8 @@ impl SourceRef {
4848
}
4949
}
5050

51-
pub fn captures<'a>(&self, log_ref: &LogRef<'a>) -> Option<Captures<'a>> {
52-
self.matcher.captures(log_ref.line)
51+
pub fn captures<'a>(&self, line: &'a str) -> Option<Captures<'a>> {
52+
self.matcher.captures(line)
5353
}
5454
}
5555

@@ -84,7 +84,7 @@ fn build_matcher(text: &str) -> Regex {
8484
.split(text)
8585
.map(regex::escape)
8686
.collect::<Vec<String>>()
87-
.join(r#"(\w+)"#);
87+
.join(r#"(.+)"#);
8888
// println!("escaped = {}", Regex::new(&escaped).unwrap().as_str());
8989
Regex::new(&escaped).unwrap()
9090
}
@@ -98,7 +98,7 @@ mod tests {
9898
fn test_build_matcher_needs_escape() {
9999
let matcher = build_matcher("{}) {}, {}");
100100
assert_eq!(
101-
Regex::new(r#"(\w+)\) (\w+), (\w+)"#).unwrap().as_str(),
101+
Regex::new(r#"(.+)\) (.+), (.+)"#).unwrap().as_str(),
102102
matcher.as_str()
103103
);
104104
}
@@ -107,7 +107,7 @@ mod tests {
107107
fn test_build_matcher_mix() {
108108
let matcher = build_matcher("{}) {:?}, {foo.bar}");
109109
assert_eq!(
110-
Regex::new(r#"(\w+)\) (\w+), (\w+)"#).unwrap().as_str(),
110+
Regex::new(r#"(.+)\) (.+), (.+)"#).unwrap().as_str(),
111111
matcher.as_str()
112112
);
113113
}

0 commit comments

Comments
 (0)