src/detail/normalize.cpp

99.3% Lines (427/430) 100.0% List of functions (21/21)
normalize.cpp
f(x) Functions (21)
Function Calls Lines Blocks
boost::urls::detail::pop_encoded_front(boost::core::basic_string_view<char>&, char&, unsigned long&) :28 7772x 100.0% 100.0% boost::urls::detail::compare_encoded(boost::core::basic_string_view<char>, boost::core::basic_string_view<char>) :50 64x 100.0% 100.0% boost::urls::detail::compare_encoded_query(boost::core::basic_string_view<char>, boost::core::basic_string_view<char>) :79 28x 100.0% 100.0% boost::urls::detail::digest_encoded(boost::core::basic_string_view<char>, boost::urls::detail::fnv_1a&) :133 1216x 100.0% 100.0% boost::urls::detail::ci_compare_encoded(boost::core::basic_string_view<char>, boost::core::basic_string_view<char>) :147 180x 100.0% 100.0% boost::urls::detail::ci_digest_encoded(boost::core::basic_string_view<char>, boost::urls::detail::fnv_1a&) :178 304x 100.0% 100.0% boost::urls::detail::compare(boost::core::basic_string_view<char>, boost::core::basic_string_view<char>) :193 46x 100.0% 100.0% boost::urls::detail::ci_compare(boost::core::basic_string_view<char>, boost::core::basic_string_view<char>) :215 220x 100.0% 100.0% boost::urls::detail::ci_digest(boost::core::basic_string_view<char>, boost::urls::detail::fnv_1a&) :237 304x 100.0% 100.0% boost::urls::detail::path_ends_with(boost::core::basic_string_view<char>, boost::core::basic_string_view<char>) :258 2136x 100.0% 88.0% boost::urls::detail::path_ends_with(boost::core::basic_string_view<char>, boost::core::basic_string_view<char>)::{lambda(char const*&, char const*&, char&)#1}::operator()(char const*&, char const*&, char&) const :266 5848x 100.0% 93.0% boost::urls::detail::remove_dot_segments(char*, char const*, boost::core::basic_string_view<char>) :317 1065x 100.0% 95.0% boost::urls::detail::remove_dot_segments(char*, char const*, boost::core::basic_string_view<char>)::{lambda(char*&, char const*, boost::core::basic_string_view<char>)#1}::operator()(char*&, char const*, boost::core::basic_string_view<char>) const :358 1879x 100.0% 88.0% boost::urls::detail::remove_dot_segments(char*, char const*, boost::core::basic_string_view<char>)::{lambda(boost::core::basic_string_view<char>, boost::core::basic_string_view<char>, unsigned long&)#1}::operator()(boost::core::basic_string_view<char>, boost::core::basic_string_view<char>, unsigned long&) const :367 12011x 100.0% 100.0% boost::urls::detail::remove_dot_segments(char*, char const*, boost::core::basic_string_view<char>)::{lambda(boost::core::basic_string_view<char>, boost::core::basic_string_view<char>)#1}::operator()(boost::core::basic_string_view<char>, boost::core::basic_string_view<char>) const :411 6016x 97.6% 100.0% boost::urls::detail::path_pop_back(boost::core::basic_string_view<char>&) :616 1154x 100.0% 97.0% boost::urls::detail::pop_last_segment(boost::core::basic_string_view<char>&, boost::core::basic_string_view<char>&, unsigned long&, bool) :639 538x 98.0% 97.0% boost::urls::detail::normalized_path_digest(boost::core::basic_string_view<char>, bool, boost::urls::detail::fnv_1a&) :748 304x 100.0% 100.0% boost::urls::detail::segments_compare(boost::urls::segments_encoded_view, boost::urls::segments_encoded_view) :770 239x 100.0% 100.0% boost::urls::detail::segments_compare(boost::urls::segments_encoded_view, boost::urls::segments_encoded_view)::{lambda(boost::urls::segments_encoded_view)#1}::operator()(boost::urls::segments_encoded_view) const :776 478x 100.0% 100.0% boost::urls::detail::segments_compare(boost::urls::segments_encoded_view, boost::urls::segments_encoded_view)::{lambda(unsigned long&, boost::urls::decode_view&, boost::urls::segments_encoded_base::iterator&, boost::urls::segments_encoded_base::iterator&, boost::urls::decode_view::iterator&, unsigned long&, bool&)#1}::operator()(unsigned long&, boost::urls::decode_view&, boost::urls::segments_encoded_base::iterator&, boost::urls::segments_encoded_base::iterator&, boost::urls::decode_view::iterator&, unsigned long&, bool&) const :812 2064x 100.0% 100.0%
Line TLA Hits Source Code
1 //
2 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
3 // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com)
4 //
5 // Distributed under the Boost Software License, Version 1.0. (See accompanying
6 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // Official repository: https://github.com/boostorg/url
9 //
10
11
12 #include <boost/url/detail/config.hpp>
13 #include <boost/url/decode_view.hpp>
14 #include <boost/url/detail/decode.hpp>
15 #include <boost/url/segments_encoded_view.hpp>
16 #include <boost/url/grammar/ci_string.hpp>
17 #include <boost/url/grammar/lut_chars.hpp>
18 #include <boost/assert.hpp>
19 #include <boost/core/ignore_unused.hpp>
20 #include <cstring>
21 #include <boost/url/detail/normalize.hpp>
22
23 namespace boost {
24 namespace urls {
25 namespace detail {
26
27 void
28 7772x pop_encoded_front(
29 core::string_view& s,
30 char& c,
31 std::size_t& n) noexcept
32 {
33 7772x if(s.front() != '%')
34 {
35 7620x c = s.front();
36 7620x s.remove_prefix(1);
37 }
38 else
39 {
40 152x detail::decode_unsafe(
41 &c,
42 &c + 1,
43 s.substr(0, 3));
44 152x s.remove_prefix(3);
45 }
46 7772x ++n;
47 7772x }
48
49 int
50 64x compare_encoded(
51 core::string_view lhs,
52 core::string_view rhs) noexcept
53 {
54 64x std::size_t n0 = 0;
55 64x std::size_t n1 = 0;
56 64x char c0 = 0;
57 64x char c1 = 0;
58 64x while(
59 486x !lhs.empty() &&
60 228x !rhs.empty())
61 {
62 215x pop_encoded_front(lhs, c0, n0);
63 215x pop_encoded_front(rhs, c1, n1);
64 215x if (c0 < c1)
65 18x return -1;
66 197x if (c1 < c0)
67 3x return 1;
68 }
69 43x n0 += detail::decode_bytes_unsafe(lhs);
70 43x n1 += detail::decode_bytes_unsafe(rhs);
71 43x if (n0 == n1)
72 22x return 0;
73 21x if (n0 < n1)
74 8x return -1;
75 13x return 1;
76 }
77
78 int
79 28x compare_encoded_query(
80 core::string_view lhs,
81 core::string_view rhs) noexcept
82 {
83 static constexpr
84 grammar::lut_chars
85 query_compare_exception_lut = "&=+";
86
87 28x std::size_t n0 = 0;
88 28x std::size_t n1 = 0;
89 28x char c0 = 0;
90 28x char c1 = 0;
91 28x while(
92 254x !lhs.empty() &&
93 122x !rhs.empty())
94 {
95 121x bool const lhs_was_decoded = lhs.front() != '%';
96 121x bool const rhs_was_decoded = rhs.front() != '%';
97 121x pop_encoded_front(lhs, c0, n0);
98 121x pop_encoded_front(rhs, c1, n1);
99 121x if (c0 < c1)
100 2x return -1;
101 119x if (c1 < c0)
102 12x return 1;
103 // The decoded chars are the same, but
104 // are these query exceptions that have
105 // different meanings when decoded?
106 107x if (query_compare_exception_lut(c0))
107 {
108 // If so, we only continue if both
109 // chars were decoded or encoded
110 // the same way.
111 40x if (lhs_was_decoded == rhs_was_decoded)
112 37x continue;
113 // Otherwise, we return a value != 0
114 // because these chars are not equal.
115 // If rhs was the decoded one, it contains
116 // an ascii char higher than '%'
117 3x if (rhs_was_decoded)
118 2x return -1;
119 else
120 1x return 1;
121 }
122 }
123 11x n0 += detail::decode_bytes_unsafe(lhs);
124 11x n1 += detail::decode_bytes_unsafe(rhs);
125 11x if (n0 == n1)
126 9x return 0;
127 2x if (n0 < n1)
128 1x return -1;
129 1x return 1;
130 }
131
132 void
133 1216x digest_encoded(
134 core::string_view s,
135 fnv_1a& hasher) noexcept
136 {
137 1216x char c = 0;
138 1216x std::size_t n = 0;
139 1724x while(!s.empty())
140 {
141 508x pop_encoded_front(s, c, n);
142 508x hasher.put(c);
143 }
144 1216x }
145
146 int
147 180x ci_compare_encoded(
148 core::string_view lhs,
149 core::string_view rhs) noexcept
150 {
151 180x std::size_t n0 = 0;
152 180x std::size_t n1 = 0;
153 180x char c0 = 0;
154 180x char c1 = 0;
155 180x while (
156 4704x !lhs.empty() &&
157 2271x !rhs.empty())
158 {
159 2265x pop_encoded_front(lhs, c0, n0);
160 2265x pop_encoded_front(rhs, c1, n1);
161 2265x c0 = grammar::to_lower(c0);
162 2265x c1 = grammar::to_lower(c1);
163 2265x if (c0 < c1)
164 10x return -1;
165 2255x if (c1 < c0)
166 2x return 1;
167 }
168 168x n0 += detail::decode_bytes_unsafe(lhs);
169 168x n1 += detail::decode_bytes_unsafe(rhs);
170 168x if (n0 == n1)
171 161x return 0;
172 7x if (n0 < n1)
173 1x return -1;
174 6x return 1;
175 }
176
177 void
178 304x ci_digest_encoded(
179 core::string_view s,
180 fnv_1a& hasher) noexcept
181 {
182 304x char c = 0;
183 304x std::size_t n = 0;
184 2366x while(!s.empty())
185 {
186 2062x pop_encoded_front(s, c, n);
187 2062x c = grammar::to_lower(c);
188 2062x hasher.put(c);
189 }
190 304x }
191
192 int
193 46x compare(
194 core::string_view lhs,
195 core::string_view rhs) noexcept
196 {
197 46x auto rlen = (std::min)(lhs.size(), rhs.size());
198 104x for (std::size_t i = 0; i < rlen; ++i)
199 {
200 79x char c0 = lhs[i];
201 79x char c1 = rhs[i];
202 79x if (c0 < c1)
203 13x return -1;
204 66x if (c1 < c0)
205 8x return 1;
206 }
207 25x if ( lhs.size() == rhs.size() )
208 4x return 0;
209 21x if ( lhs.size() < rhs.size() )
210 8x return -1;
211 13x return 1;
212 }
213
214 int
215 220x ci_compare(
216 core::string_view lhs,
217 core::string_view rhs) noexcept
218 {
219 220x auto rlen = (std::min)(lhs.size(), rhs.size());
220 1125x for (std::size_t i = 0; i < rlen; ++i)
221 {
222 912x char c0 = grammar::to_lower(lhs[i]);
223 912x char c1 = grammar::to_lower(rhs[i]);
224 912x if (c0 < c1)
225 6x return -1;
226 906x if (c1 < c0)
227 1x return 1;
228 }
229 213x if ( lhs.size() == rhs.size() )
230 205x return 0;
231 8x if ( lhs.size() < rhs.size() )
232 6x return -1;
233 2x return 1;
234 }
235
236 void
237 304x ci_digest(
238 core::string_view s,
239 fnv_1a& hasher) noexcept
240 {
241 1034x for (char c: s)
242 {
243 730x c = grammar::to_lower(c);
244 730x hasher.put(c);
245 }
246 304x }
247
248 /* Check if a string ends with the specified suffix (decoded comparison)
249
250 This function determines if a string ends with the specified suffix
251 when the string and suffix are compared after percent-decoding.
252
253 @param str The string to check (percent-encoded)
254 @param suffix The suffix to check for (percent-decoded)
255 @return The number of encoded chars consumed in the string
256 */
257 std::size_t
258 2136x path_ends_with(
259 core::string_view str,
260 core::string_view suffix) noexcept
261 {
262 2136x BOOST_ASSERT(!str.empty());
263 2136x BOOST_ASSERT(!suffix.empty());
264 2136x BOOST_ASSERT(!suffix.contains("%2F"));
265 2136x BOOST_ASSERT(!suffix.contains("%2f"));
266 5848x auto consume_last = [](
267 core::string_view::iterator& it,
268 core::string_view::iterator& end,
269 char& c)
270 {
271 5848x BOOST_ASSERT(end > it);
272 5848x BOOST_ASSERT(it != end);
273 9808x if ((end - it) < 3 ||
274 7920x *(std::prev(end, 3)) != '%')
275 {
276 5800x c = *--end;
277 5800x return false;
278 }
279 96x detail::decode_unsafe(
280 &c,
281 &c + 1,
282 core::string_view(std::prev(
283 end, 3), 3));
284 48x end -= 3;
285 48x return true;
286 };
287
288 2136x auto it0 = str.begin();
289 2136x auto end0 = str.end();
290 2136x auto it1 = suffix.begin();
291 2136x auto end1 = suffix.end();
292 2136x char c0 = 0;
293 2136x char c1 = 0;
294 2136x while(
295 3248x it0 < end0 &&
296 3006x it1 < end1)
297 {
298 2932x bool const is_encoded = consume_last(it0, end0, c0);
299 // The suffix never contains an encoded slash (%2F), and a decoded
300 // slash is not equivalent to an encoded slash
301 2932x if (is_encoded && c0 == '/')
302 16x return 0;
303 2916x consume_last(it1, end1, c1);
304 2916x if (c0 != c1)
305 1804x return 0;
306 }
307 316x bool const consumed_suffix = it1 == end1;
308 316x if (consumed_suffix)
309 {
310 110x std::size_t const consumed_encoded = str.end() - end0;
311 110x return consumed_encoded;
312 }
313 206x return 0;
314 }
315
316 std::size_t
317 1065x remove_dot_segments(
318 char* dest0,
319 char const* end,
320 core::string_view input) noexcept
321 {
322 // 1. The input buffer `s` is initialized with
323 // the now-appended path components and the
324 // output buffer `dest0` is initialized to
325 // the empty string.
326 1065x char* dest = dest0;
327 1065x bool const is_absolute = input.starts_with('/');
328
329 // Step 2 is a loop through 5 production rules:
330 // https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4
331 //
332 // There are no transitions between all rules,
333 // which enables some optimizations.
334 //
335 // Initial:
336 // - Rule A: handle initial dots
337 // If the input buffer begins with a
338 // prefix of "../" or "./", then remove
339 // that prefix from the input buffer.
340 // Rule A can only happen at the beginning.
341 // Errata 4547: Keep "../" in the beginning
342 // https://www.rfc-editor.org/errata/eid4547
343 //
344 // Then:
345 // - Rule D: ignore a final ".." or "."
346 // if the input buffer consists only of "."
347 // or "..", then remove that from the input
348 // buffer.
349 // Rule D can only happen after Rule A because:
350 // - B and C write "/" to the input
351 // - E writes "/" to input or returns
352 //
353 // Then:
354 // - Rule B: ignore ".": write "/" to the input
355 // - Rule C: apply "..": remove seg and write "/"
356 // - Rule E: copy complete segment
357 auto append =
358 1879x [](char*& first, char const* last, core::string_view in)
359 {
360 // append `in` to `dest`
361 1879x BOOST_ASSERT(in.size() <= std::size_t(last - first));
362 1879x std::memmove(first, in.data(), in.size());
363 1879x first += in.size();
364 ignore_unused(last);
365 1879x };
366
367 12011x auto dot_starts_with = [](
368 core::string_view str, core::string_view dots, std::size_t& n)
369 {
370 // starts_with for encoded/decoded dots
371 // or decoded otherwise. return how many
372 // chars in str match the dots
373 12011x n = 0;
374 21036x for (char c: dots)
375 {
376 20431x if (str.starts_with(c))
377 {
378 9025x str.remove_prefix(1);
379 9025x ++n;
380 9025x continue;
381 }
382
383 // In the general case, we would need to
384 // check if the next char is an encoded
385 // dot.
386 // However, an encoded dot in `str`
387 // would have already been decoded in
388 // url_base::normalize_path().
389 // This needs to be undone if
390 // `remove_dot_segments` is used in a
391 // different context.
392 // if (str.size() > 2 &&
393 // c == '.'
394 // &&
395 // str[0] == '%' &&
396 // str[1] == '2' &&
397 // (str[2] == 'e' ||
398 // str[2] == 'E'))
399 // {
400 // str.remove_prefix(3);
401 // n += 3;
402 // continue;
403 // }
404
405 11406x n = 0;
406 11406x return false;
407 }
408 605x return true;
409 };
410
411 6016x auto dot_equal = [&dot_starts_with](
412 core::string_view str, core::string_view dots)
413 {
414 6016x std::size_t n = 0;
415 6016x dot_starts_with(str, dots, n);
416 6016x return n == str.size();
417 1065x };
418
419 // Rule A
420 std::size_t n;
421 1086x while (!input.empty())
422 {
423 960x if (dot_starts_with(input, "../", n))
424 {
425 // Errata 4547
426 4x append(dest, end, "../");
427 4x input.remove_prefix(n);
428 4x continue;
429 }
430 956x else if (!dot_starts_with(input, "./", n))
431 {
432 939x break;
433 }
434 17x input.remove_prefix(n);
435 }
436
437 // Rule D
438 1065x if( dot_equal(input, "."))
439 {
440 127x input = {};
441 }
442 938x else if( dot_equal(input, "..") )
443 {
444 // Errata 4547
445 3x append(dest, end, "..");
446 3x input = {};
447 }
448
449 // 2. While the input buffer is not empty,
450 // loop as follows:
451 3088x while (!input.empty())
452 {
453 // Rule B
454 2062x bool const is_dot_seg = dot_starts_with(input, "/./", n);
455 2062x if (is_dot_seg)
456 {
457 37x input.remove_prefix(n - 1);
458 37x continue;
459 }
460
461 2025x bool const is_final_dot_seg = dot_equal(input, "/.");
462 2025x if (is_final_dot_seg)
463 {
464 // We can't remove "." from a core::string_view
465 // So what we do here is equivalent to
466 // replacing s with '/' as required
467 // in Rule B and executing the next
468 // iteration, which would append this
469 // '/' to the output, as required by
470 // Rule E
471 8x append(dest, end, input.substr(0, 1));
472 8x input = {};
473 8x break;
474 }
475
476 // Rule C
477 2017x bool const is_dotdot_seg = dot_starts_with(input, "/../", n);
478 2017x if (is_dotdot_seg)
479 {
480 215x core::string_view cur_out(dest0, dest - dest0);
481 215x std::size_t p = cur_out.find_last_of('/');
482 215x bool const has_multiple_segs = p != core::string_view::npos;
483 215x if (has_multiple_segs)
484 {
485 // output has multiple segments
486 // "erase" [p, end] if not "/.."
487 144x core::string_view last_seg(dest0 + p, dest - (dest0 + p));
488 144x bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");
489 144x if (!prev_is_dotdot_seg)
490 {
491 133x dest = dest0 + p;
492 }
493 else
494 {
495 11x append(dest, end, "/..");
496 }
497 }
498 71x else if (dest0 != dest)
499 {
500 // Only one segment in the output: remove it
501 21x core::string_view last_seg(dest0, dest - dest0);
502 21x bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");
503 21x if (!prev_is_dotdot_seg)
504 {
505 19x dest = dest0;
506 19x if (!is_absolute)
507 {
508 19x input.remove_prefix(1);
509 }
510 }
511 else
512 {
513 2x append(dest, end, "/..");
514 }
515 }
516 else
517 {
518 // Output is empty
519 50x if (is_absolute)
520 {
521 50x append(dest, end, "/..");
522 }
523 else
524 {
525 // AFREITAS: Although we have no formal proof
526 // for that, the output can't be relative
527 // and empty at this point because relative
528 // paths will fall in the `dest0 != dest`
529 // case above of this rule C and then the
530 // general case of rule E for "..".
531 append(dest, end, "..");
532 }
533 }
534 215x input.remove_prefix(n - 1);
535 215x continue;
536 215x }
537
538 1802x bool const is_final_dotdot_seg = dot_equal(input, "/..");
539 1802x if (is_final_dotdot_seg)
540 {
541 31x core::string_view cur_out(dest0, dest - dest0);
542 31x std::size_t p = cur_out.find_last_of('/');
543 31x bool const has_multiple_segs = p != core::string_view::npos;
544 31x if (has_multiple_segs)
545 {
546 // output has multiple segments
547 // "erase" [p, end] if not "/.."
548 18x core::string_view last_seg(dest0 + p, dest - (dest0 + p));
549 18x bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");
550 18x if (!prev_is_dotdot_seg)
551 {
552 14x dest = dest0 + p;
553 14x append(dest, end, "/");
554 }
555 else
556 {
557 4x append(dest, end, "/..");
558 }
559 }
560 13x else if (dest0 != dest)
561 {
562 // Only one segment in the output: remove it
563 3x core::string_view last_seg(dest0, dest - dest0);
564 3x bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");
565 3x if (!prev_is_dotdot_seg) {
566 1x dest = dest0;
567 }
568 else
569 {
570 2x append(dest, end, "/..");
571 }
572 }
573 else
574 {
575 // Output is empty: append dotdot
576 10x if (is_absolute)
577 {
578 10x append(dest, end, "/..");
579 }
580 else
581 {
582 // AFREITAS: Although we have no formal proof
583 // for that, the output can't be relative
584 // and empty at this point because relative
585 // paths will fall in the `dest0 != dest`
586 // case above of this rule C and then the
587 // general case of rule E for "..".
588 append(dest, end, "..");
589 }
590 }
591 31x input = {};
592 31x break;
593 }
594
595 // Rule E
596 1771x std::size_t p = input.find_first_of('/', 1);
597 1771x if (p != core::string_view::npos)
598 {
599 875x append(dest, end, input.substr(0, p));
600 875x input.remove_prefix(p);
601 }
602 else
603 {
604 896x append(dest, end, input);
605 896x input = {};
606 }
607 }
608
609 // 3. Finally, the output buffer is set
610 // as the result of remove_dot_segments,
611 // and we return its size
612 1065x return dest - dest0;
613 }
614
615 char
616 1154x path_pop_back( core::string_view& s )
617 {
618 1676x if (s.size() < 3 ||
619 1044x *std::prev(s.end(), 3) != '%')
620 {
621 1102x char c = s.back();
622 1102x s.remove_suffix(1);
623 1102x return c;
624 }
625 52x char c = 0;
626 104x detail::decode_unsafe(
627 104x &c, &c + 1, s.substr(s.size() - 3));
628 52x if (c != '/')
629 {
630 44x s.remove_suffix(3);
631 44x return c;
632 }
633 8x c = s.back();
634 8x s.remove_suffix(1);
635 8x return c;
636 };
637
638 void
639 538x pop_last_segment(
640 core::string_view& str,
641 core::string_view& seg,
642 std::size_t& level,
643 bool remove_unmatched) noexcept
644 {
645 538x seg = {};
646 538x std::size_t n = 0;
647 700x while (!str.empty())
648 {
649 // B. if the input buffer begins with a
650 // prefix of "/./" or "/.", where "." is
651 // a complete path segment, then replace
652 // that prefix with "/" in the input
653 // buffer; otherwise,
654 558x n = detail::path_ends_with(str, "/./");
655 558x if (n)
656 {
657 10x seg = str.substr(str.size() - n);
658 10x str.remove_suffix(n);
659 10x continue;
660 }
661 548x n = detail::path_ends_with(str, "/.");
662 548x if (n)
663 {
664 12x seg = str.substr(str.size() - n, 1);
665 12x str.remove_suffix(n);
666 12x continue;
667 }
668
669 // C. if the input buffer begins with a
670 // prefix of "/../" or "/..", where ".."
671 // is a complete path segment, then
672 // replace that prefix with "/" in the
673 // input buffer and remove the last
674 // segment and its preceding "/"
675 // (if any) from the output buffer
676 // otherwise,
677 536x n = detail::path_ends_with(str, "/../");
678 536x if (n)
679 {
680 42x seg = str.substr(str.size() - n);
681 42x str.remove_suffix(n);
682 42x ++level;
683 42x continue;
684 }
685 494x n = detail::path_ends_with(str, "/..");
686 494x if (n)
687 {
688 46x seg = str.substr(str.size() - n);
689 46x str.remove_suffix(n);
690 46x ++level;
691 46x continue;
692 }
693
694 // E. move the first path segment in the
695 // input buffer to the end of the output
696 // buffer, including the initial "/"
697 // character (if any) and any subsequent
698 // characters up to, but not including,
699 // the next "/" character or the end of
700 // the input buffer.
701 448x std::size_t p = str.size() > 1
702 448x ? str.find_last_of('/', str.size() - 2)
703 448x : core::string_view::npos;
704 448x if (p != core::string_view::npos)
705 {
706 276x seg = str.substr(p + 1);
707 276x str.remove_suffix(seg.size());
708 }
709 else
710 {
711 172x seg = str;
712 172x str = {};
713 }
714
715 448x if (level == 0)
716 396x return;
717 52x if (!str.empty())
718 42x --level;
719 }
720 // we still need to skip n_skip + 1
721 // but the string is empty
722 142x if (remove_unmatched && level)
723 {
724 34x seg = "/";
725 34x level = 0;
726 34x return;
727 }
728 108x else if (level)
729 {
730 4x if (!seg.empty())
731 {
732 4x seg = "/../";
733 }
734 else
735 {
736 // AFREITAS: this condition
737 // is correct, but it might
738 // unreachable.
739 seg = "/..";
740 }
741 4x --level;
742 4x return;
743 }
744 104x seg = {};
745 }
746
747 void
748 304x normalized_path_digest(
749 core::string_view str,
750 bool remove_unmatched,
751 fnv_1a& hasher) noexcept
752 {
753 304x core::string_view seg;
754 304x std::size_t level = 0;
755 do
756 {
757 538x pop_last_segment(
758 str, seg, level, remove_unmatched);
759 1692x while (!seg.empty())
760 {
761 1154x char c = path_pop_back(seg);
762 1154x hasher.put(c);
763 }
764 }
765 538x while (!str.empty());
766 304x }
767
768 // compare segments as if there were a normalized
769 int
770 239x segments_compare(
771 segments_encoded_view seg0,
772 segments_encoded_view seg1) noexcept
773 {
774 // calculate path size as if it were normalized
775 auto normalized_size =
776 478x [](segments_encoded_view seg) -> std::size_t
777 {
778 478x if (seg.empty())
779 144x return seg.is_absolute();
780
781 334x std::size_t n = 0;
782 334x std::size_t skip = 0;
783 334x auto begin = seg.begin();
784 334x auto it = seg.end();
785 1096x while (it != begin)
786 {
787 762x --it;
788 762x decode_view dseg = **it;
789 762x if (dseg == "..")
790 167x ++skip;
791 595x else if (dseg != ".")
792 {
793 557x if (skip)
794 85x --skip;
795 else
796 472x n += dseg.size() + 1;
797 }
798 }
799 334x n += skip * 3;
800 334x n -= !seg.is_absolute();
801 334x return n;
802 };
803
804 // find the normalized size for the comparison
805 239x std::size_t n0 = normalized_size(seg0);
806 239x std::size_t n1 = normalized_size(seg1);
807 239x std::size_t n00 = n0;
808 239x std::size_t n10 = n1;
809
810 // consume the last char from a segment range
811 auto consume_last =
812 2064x [](
813 std::size_t& n,
814 decode_view& dseg,
815 segments_encoded_view::iterator& begin,
816 segments_encoded_view::iterator& it,
817 decode_view::iterator& cit,
818 std::size_t& skip,
819 bool& at_slash) -> char
820 {
821 2064x if (cit != dseg.begin())
822 {
823 // return last char from current segment
824 1387x at_slash = false;
825 1387x --cit;
826 1387x --n;
827 1387x return *cit;
828 }
829
830 677x if (!at_slash)
831 {
832 // current segment dseg is over and
833 // previous char was not a slash
834 // so we output one
835 403x at_slash = true;
836 403x --n;
837 403x return '/';
838 }
839
840 // current segment dseg is over and
841 // last char was already the slash
842 // between segments, so take the
843 // next final segment to consume
844 274x at_slash = false;
845 512x while (cit == dseg.begin())
846 {
847 // take next segment
848 512x if (it != begin)
849 380x --it;
850 else
851 132x break;
852 380x if (**it == "..")
853 {
854 // skip next if this is ".."
855 140x ++skip;
856 }
857 240x else if (**it != ".")
858 {
859 212x if (skip)
860 {
861 // discount skips
862 70x --skip;
863 }
864 else
865 {
866 // or update current seg
867 142x dseg = **it;
868 142x cit = dseg.end();
869 142x break;
870 }
871 }
872 }
873 // consume from the new current
874 // segment
875 274x --n;
876 274x if (cit != dseg.begin())
877 {
878 // in the general case, we consume
879 // one more character from the end
880 127x --cit;
881 127x return *cit;
882 }
883
884 // nothing left to consume in the
885 // current and new segment
886 147x if (it == begin)
887 {
888 // if this is the first
889 // segment, the segments are
890 // over and there can only
891 // be repetitions of "../" to
892 // output
893 138x return "/.."[n % 3];
894 }
895 // at other segments, we need
896 // a slash to transition to the
897 // next segment
898 9x at_slash = true;
899 9x return '/';
900 };
901
902 // consume final segments from seg0 that
903 // should not influence the comparison
904 239x auto begin0 = seg0.begin();
905 239x auto it0 = seg0.end();
906 239x decode_view dseg0;
907 239x if (it0 != seg0.begin())
908 {
909 166x --it0;
910 166x dseg0 = **it0;
911 }
912 239x decode_view::iterator cit0 = dseg0.end();
913 239x std::size_t skip0 = 0;
914 239x bool at_slash0 = true;
915 377x while (n0 > n1)
916 {
917 138x consume_last(n0, dseg0, begin0, it0, cit0, skip0, at_slash0);
918 }
919
920 // consume final segments from seg1 that
921 // should not influence the comparison
922 239x auto begin1 = seg1.begin();
923 239x auto it1 = seg1.end();
924 239x decode_view dseg1;
925 239x if (it1 != seg1.begin())
926 {
927 168x --it1;
928 168x dseg1 = **it1;
929 }
930 239x decode_view::iterator cit1 = dseg1.end();
931 239x std::size_t skip1 = 0;
932 239x bool at_slash1 = true;
933 285x while (n1 > n0)
934 {
935 46x consume_last(n1, dseg1, begin1, it1, cit1, skip1, at_slash1);
936 }
937
938 239x int cmp = 0;
939 1179x while (n0)
940 {
941 940x char c0 = consume_last(
942 n0, dseg0, begin0, it0, cit0, skip0, at_slash0);
943 940x char c1 = consume_last(
944 n1, dseg1, begin1, it1, cit1, skip1, at_slash1);
945 940x if (c0 < c1)
946 40x cmp = -1;
947 900x else if (c1 < c0)
948 44x cmp = +1;
949 }
950
951 239x if (cmp != 0)
952 48x return cmp;
953 191x if ( n00 == n10 )
954 185x return 0;
955 6x if ( n00 < n10 )
956 4x return -1;
957 2x return 1;
958 }
959
960 } // detail
961 } // urls
962 } // boost
963
964