Skip to content

Commit b73cfdf

Browse files
committed
Eager caching with parse hints (native-layer only)
1 parent f0934fd commit b73cfdf

4 files changed

Lines changed: 114 additions & 163 deletions

File tree

krabs/krabs/parser.hpp

Lines changed: 103 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,15 @@ namespace krabs {
7878
template <typename T>
7979
bool try_parse(std::wstring_view name, T &out);
8080

81+
/**
82+
* <summary>
83+
* Attempts to retrieve the given property by name and type,
84+
* starting the name scan at the given hint index.
85+
* </summary>
86+
*/
87+
template <typename T>
88+
bool try_parse(std::wstring_view name, T &out, ULONG hint);
89+
8190
/**
8291
* <summary>
8392
* Attempts to parse the given property by name and type. If the
@@ -87,22 +96,36 @@ namespace krabs {
8796
template <typename T>
8897
T parse(std::wstring_view name);
8998

99+
/**
100+
* <summary>
101+
* Attempts to parse the given property by name and type,
102+
* starting the name scan at the given hint index.
103+
* </summary>
104+
*/
105+
template <typename T>
106+
T parse(std::wstring_view name, ULONG hint);
107+
90108
template <typename Adapter>
91109
auto view_of(std::wstring_view name, Adapter &adapter) -> collection_view<typename Adapter::const_iterator>;
92110

111+
template <typename Adapter>
112+
auto view_of(std::wstring_view name, ULONG hint, Adapter &adapter) -> collection_view<typename Adapter::const_iterator>;
113+
93114
private:
94115
property_info find_property(std::wstring_view name);
95-
void cache_property(ULONG index, property_info info);
116+
property_info find_property(std::wstring_view name, ULONG hint);
117+
void ensure_cache_populated();
96118

97119
private:
98120
const schema &schema_;
99121
const BYTE *pEndBuffer_;
100-
BYTE *pBufferIndex_;
101-
ULONG lastPropertyIndex_;
102-
// Persistent name to index map shared across all events of the same type.
103-
const property_name_map *pPropertyNames_;
104-
// Maintain a mapping from property index to blob data location.
122+
123+
// Fully populated on first access -- maps property index to its
124+
// location and size in the event's user-data blob.
105125
std::vector<property_info> propertyCache_;
126+
127+
// Hint for name scan -- start from here on the next lookup.
128+
ULONG nextHint_;
106129
};
107130

108131
// Implementation
@@ -111,112 +134,88 @@ namespace krabs {
111134
inline parser::parser(const schema &s)
112135
: schema_(s)
113136
, pEndBuffer_((BYTE*)s.record_.UserData + s.record_.UserDataLength)
114-
, pBufferIndex_((BYTE*)s.record_.UserData)
115-
, lastPropertyIndex_(0)
116-
, pPropertyNames_(s.pPropertyNames_)
117-
, propertyCache_(s.pSchema_->PropertyCount)
137+
, nextHint_(0)
118138
{}
119139

120140
inline property_iterator parser::properties() const
121141
{
122142
return property_iterator(schema_);
123143
}
124144

125-
inline property_info parser::find_property(std::wstring_view name)
145+
inline void parser::ensure_cache_populated()
126146
{
127-
// A schema contains a collection of properties that are keyed by name.
128-
// These properties are stored in a blob of bytes that needs to be
129-
// interpreted according to information that is packaged up in the
130-
// schema and that can be retrieved using the Tdh* APIs. This format
131-
// requires a linear traversal over the blob, incrementing according to
132-
// the contents within it. This is janky, so our strategy is to
133-
// minimize this as much as possible via caching.
134-
135-
const ULONG totalPropCount = schema_.pSchema_->PropertyCount;
136-
137-
// Resolve property name to index.
138-
ULONG index = totalPropCount; // sentinel = not found
139-
if (pPropertyNames_) {
140-
// Fast path: use the persistent name to index map shared across
141-
// all events of the same type.
142-
auto it = pPropertyNames_->find(name);
143-
if (it != pPropertyNames_->end()) {
144-
index = it->second;
145-
}
146-
} else {
147-
// Fallback: linear scan of property names in the schema.
148-
for (ULONG i = 0; i < totalPropCount; ++i) {
149-
auto &propInfo = schema_.pSchema_->EventPropertyInfoArray[i];
150-
const wchar_t *pName = reinterpret_cast<const wchar_t*>(
151-
reinterpret_cast<const BYTE*>(schema_.pSchema_) +
152-
propInfo.NameOffset);
153-
if (name == pName) {
154-
index = i;
155-
break;
156-
}
157-
}
158-
}
159-
160-
if (index >= totalPropCount) {
161-
return property_info();
147+
if (!propertyCache_.empty()) {
148+
return;
162149
}
163150

164-
// The first step is to use our cache for the property to see if we've
165-
// discovered it already.
166-
if (index < lastPropertyIndex_) {
167-
return propertyCache_[index];
151+
const ULONG totalPropCount = schema_.pSchema_->PropertyCount;
152+
if (totalPropCount == 0) {
153+
return;
168154
}
169155

170-
assert((pBufferIndex_ <= pEndBuffer_ && pBufferIndex_ >= schema_.record_.UserData) &&
171-
"invariant: we should've already thrown for falling off the edge");
172-
173-
// accept that last property can be omitted from buffer. this happens if last property
174-
// is string but empty and the provider strips the null terminator
175-
assert((pBufferIndex_ == pEndBuffer_ ? ((totalPropCount - lastPropertyIndex_) <= 1)
176-
: true)
177-
&& "invariant: if we've exhausted our buffer, then we must've"
178-
"exhausted the properties as well");
156+
propertyCache_.reserve(totalPropCount);
157+
BYTE *pBuffer = (BYTE*)schema_.record_.UserData;
179158

180-
// We've not looked up this property before, so we have to do the work
181-
// to find it. While we're going through the blob to find it, we'll
182-
// remember what we've seen to save time later.
183-
//
184-
// Note: The name-to-index map is built once per schema type (cheap
185-
// metadata scan). But the blob walk below is lazy per-event -- we
186-
// only walk forward to the requested index, avoiding overhead when
187-
// only a subset of properties are needed.
188-
while (lastPropertyIndex_ <= index) {
189-
190-
auto &currentPropInfo = schema_.pSchema_->EventPropertyInfoArray[lastPropertyIndex_];
159+
for (ULONG i = 0; i < totalPropCount; ++i) {
160+
auto &currentPropInfo = schema_.pSchema_->EventPropertyInfoArray[i];
191161
const wchar_t *pName = reinterpret_cast<const wchar_t*>(
192162
reinterpret_cast<const BYTE*>(schema_.pSchema_) +
193163
currentPropInfo.NameOffset);
194164

195165
ULONG propertyLength = size_provider::get_property_size(
196-
pBufferIndex_,
166+
pBuffer,
197167
pName,
198168
schema_.record_,
199169
currentPropInfo);
200170

201-
// verify that the length of the property doesn't exceed the buffer
202-
if (pBufferIndex_ + propertyLength > pEndBuffer_) {
171+
if (pBuffer + propertyLength > pEndBuffer_) {
203172
throw std::out_of_range("Property length past end of property buffer");
204173
}
205174

206-
property_info propInfo(pBufferIndex_, currentPropInfo, propertyLength);
207-
cache_property(lastPropertyIndex_, propInfo);
208-
209-
// advance the buffer index since we've already processed this property
210-
pBufferIndex_ += propertyLength;
211-
lastPropertyIndex_++;
175+
propertyCache_.emplace_back(pBuffer, currentPropInfo, propertyLength);
176+
pBuffer += propertyLength;
212177
}
178+
}
213179

214-
return propertyCache_[index];
180+
inline property_info parser::find_property(std::wstring_view name)
181+
{
182+
return find_property(name, nextHint_);
215183
}
216184

217-
inline void parser::cache_property(ULONG index, property_info info)
185+
inline property_info parser::find_property(std::wstring_view name, ULONG hint)
218186
{
219-
propertyCache_[index] = info;
187+
ensure_cache_populated();
188+
189+
const ULONG totalPropCount = schema_.pSchema_->PropertyCount;
190+
if (totalPropCount == 0) {
191+
return property_info();
192+
}
193+
194+
// Hinted linear scan. In the common case (sequential access
195+
// or caller-provided index) this hits on the first comparison.
196+
if (hint >= totalPropCount) {
197+
hint = 0;
198+
}
199+
200+
ULONG index = totalPropCount; // sentinel = not found
201+
for (ULONG n = 0; n < totalPropCount; ++n) {
202+
ULONG i = (hint + n) % totalPropCount;
203+
auto &propInfo = schema_.pSchema_->EventPropertyInfoArray[i];
204+
const wchar_t *pName = reinterpret_cast<const wchar_t*>(
205+
reinterpret_cast<const BYTE*>(schema_.pSchema_) +
206+
propInfo.NameOffset);
207+
if (name == pName) {
208+
index = i;
209+
break;
210+
}
211+
}
212+
213+
if (index >= totalPropCount) {
214+
return property_info();
215+
}
216+
217+
nextHint_ = (index + 1) % totalPropCount;
218+
return propertyCache_[index];
220219
}
221220

222221
inline void throw_if_property_not_found(const property_info &propInfo)
@@ -244,6 +243,13 @@ namespace krabs {
244243
// try_parse
245244
// ------------------------------------------------------------------------
246245

246+
template <typename T>
247+
bool parser::try_parse(std::wstring_view name, T &out, ULONG hint)
248+
{
249+
nextHint_ = hint;
250+
return try_parse(name, out);
251+
}
252+
247253
template <typename T>
248254
bool parser::try_parse(std::wstring_view name, T &out)
249255
{
@@ -269,6 +275,13 @@ namespace krabs {
269275
// parse
270276
// ------------------------------------------------------------------------
271277

278+
template <typename T>
279+
T parser::parse(std::wstring_view name, ULONG hint)
280+
{
281+
nextHint_ = hint;
282+
return parse<T>(name);
283+
}
284+
272285
template <typename T>
273286
T parser::parse(std::wstring_view name)
274287
{
@@ -435,6 +448,14 @@ namespace krabs {
435448
// view_of
436449
// ------------------------------------------------------------------------
437450

451+
template <typename Adapter>
452+
auto parser::view_of(std::wstring_view name, ULONG hint, Adapter &adapter)
453+
-> collection_view<typename Adapter::const_iterator>
454+
{
455+
nextHint_ = hint;
456+
return view_of(name, adapter);
457+
}
458+
438459
template <typename Adapter>
439460
auto parser::view_of(std::wstring_view name, Adapter &adapter)
440461
-> collection_view<typename Adapter::const_iterator>

krabs/krabs/schema.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,6 @@ namespace krabs {
308308
private:
309309
const EVENT_RECORD &record_;
310310
const TRACE_EVENT_INFO *pSchema_;
311-
// Persistent name to index map, owned by schema_locator. May be nullptr.
312-
const property_name_map *pPropertyNames_;
313311

314312
private:
315313
friend std::wstring event_name(const schema &);
@@ -339,13 +337,11 @@ namespace krabs {
339337
inline schema::schema(const EVENT_RECORD &record, const krabs::schema_locator &schema_locator)
340338
: record_(record)
341339
, pSchema_(schema_locator.get_event_schema(record))
342-
, pPropertyNames_(schema_locator.get_property_names(pSchema_))
343340
{ }
344341

345342
inline schema::schema(const EVENT_RECORD &record, const PTRACE_EVENT_INFO pSchema)
346343
: record_(record)
347344
, pSchema_(pSchema)
348-
, pPropertyNames_(nullptr)
349345
{ }
350346

351347
inline bool schema::operator==(const schema &other) const

krabs/krabs/schema_locator.hpp

Lines changed: 2 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -173,14 +173,6 @@ namespace krabs {
173173
*/
174174
std::string_view get_trace_logger_event_name(const EVENT_RECORD &);
175175

176-
/**
177-
* <summary>
178-
* Maps property names to their index in the schema.
179-
* Keys are wstring_views pointing into stable TRACE_EVENT_INFO memory.
180-
* </summary>
181-
*/
182-
using property_name_map = std::unordered_map<std::wstring_view, ULONG>;
183-
184176
/**
185177
* <summary>
186178
* Fetches and caches schemas from TDH.
@@ -215,21 +207,8 @@ namespace krabs {
215207
*/
216208
bool has_event_schema(const EVENT_RECORD& record) const;
217209

218-
/**
219-
* <summary>
220-
* Returns the persistent property name to index map for a schema.
221-
* The map is built when the schema is first cached.
222-
* Returns nullptr if pSchema is null or not in the cache.
223-
* </summary>
224-
*/
225-
const property_name_map* get_property_names(const TRACE_EVENT_INFO* pSchema) const;
226-
227210
private:
228-
void build_property_names(const TRACE_EVENT_INFO* pSchema) const;
229-
230211
mutable std::unordered_map<schema_key, std::variant<std::unique_ptr<char[]>, TDHSTATUS>> cache_;
231-
// Persistent property name to index maps, keyed by schema pointer.
232-
mutable std::unordered_map<const TRACE_EVENT_INFO*, property_name_map> property_name_cache_;
233212
};
234213

235214
// Implementation
@@ -332,10 +311,9 @@ namespace krabs {
332311

333312
// Add the new instance to the cache.
334313
// NB: key's 'internalize_name' gets called by the cctor here.
335-
if (status == ERROR_SUCCESS) {
314+
if (status == ERROR_SUCCESS)
336315
cache_.emplace(key, std::move(buffer));
337-
build_property_names(returnVal);
338-
} else
316+
else
339317
cache_.emplace(key, status);
340318

341319
return returnVal;
@@ -348,29 +326,6 @@ namespace krabs {
348326
return status == ERROR_SUCCESS;
349327
}
350328

351-
inline void schema_locator::build_property_names(const TRACE_EVENT_INFO* pSchema) const
352-
{
353-
property_name_map names;
354-
for (ULONG i = 0; i < pSchema->PropertyCount; ++i) {
355-
const wchar_t* pName = reinterpret_cast<const wchar_t*>(
356-
reinterpret_cast<const BYTE*>(pSchema) +
357-
pSchema->EventPropertyInfoArray[i].NameOffset);
358-
names.emplace(std::wstring_view(pName), i);
359-
}
360-
property_name_cache_.emplace(pSchema, std::move(names));
361-
}
362-
363-
inline const property_name_map* schema_locator::get_property_names(const TRACE_EVENT_INFO* pSchema) const
364-
{
365-
if (!pSchema) return nullptr;
366-
367-
auto it = property_name_cache_.find(pSchema);
368-
if (it != property_name_cache_.end()) {
369-
return &it->second;
370-
}
371-
return nullptr;
372-
}
373-
374329
inline std::unique_ptr<char[]> get_event_schema_from_tdh(const EVENT_RECORD &record)
375330
{
376331
TDHSTATUS status = ERROR_SUCCESS;

0 commit comments

Comments
 (0)