使用 boost 解析日期时间字符串:使用一位数的小时格式

2021-12-24 00:00:00 android-ndk c++ boost boost-date-time

我正在处理需要在 NDK 工具链上编译的代码.不幸的是,最新版本只支持直到 gcc4.9 不支持 C++11 日期时间解析.我有一个日期时间字符串,我需要通过两到三种格式发送它来找出解析方法.

所以我尝试了 linux API stftime,它有时会给出错误解析方法的值.我不得不放弃它并转向提升.

现在来提升我使用的是 1_64 版本.根据文档这里

我找不到解析单个数字小时格式的方法.

bool getepochtime(const std::string &str, const std::string &format, unsigned long &epoch){纪元 = 0;命名空间 bt = boost::posix_time;std::locale lformat = std::locale(std::locale::classic(), new bt::time_input_facet(format));bt::ptime pt;std::istringstream is(str);is.imbue(lformat);是 >>点;如果 (pt == bt::ptime()) {//时代= 0;返回假;}bt::ptime timet_start(boost::gregorian::date(1970, 1, 1));bt::time_duration diff = pt - timet_start;纪元 = (1000 * diff.ticks()/bt::time_duration::rep_type::ticks_per_second);返回真;}int main() {无符号长评估;//这有效.getepochtime("1 月 28 日晚上 11:50", "%dth %B %H:%M %p", eval);//这不起作用.getepochtime("1 月 28 日下午 1:50", "%dth %B %I:%M %p", eval);//也不是这个.getepochtime("1 月 28 日下午 1:50", "%dth %B %H:%M %p", eval);返回0;}

任何帮助将不胜感激.

解决方案

我将留给您来解决您希望如何解释没有年份的日期.但是,这是使用/just/strptime 的快速入门.

我在更大的代码库中使用了它,我们需要一些非常通用的日期识别.看:自适应日期时间解析器:

#pragma once#include <字符串>#include <chrono>#include #include <列表>命名空间 mylib { 命名空间日期时间 {/** 支持多格式的日期时间解析器** 旨在提供支持的格式列表,按顺序排列* 偏爱.默认情况下,解析器不是自适应的(模式为固定").** 在自适应模式下,格式可能需要** - 粘性(始终重用第一个匹配的格式)* - ban_failed(从列表中删除失败的模式;禁止只发生* 成功解析以避免禁止无效输入的所有模式)* - mru(保留列表但重新排序以提高性能)** 注意:* 如果格式不明确(例如 mm-dd-yyyy 与 dd-mm-yyyy),则允许* 重新排序会导致不可预测的结果.* =>仅在没有模糊格式时才使用 `mru`** 笔记:* 函数对象是有状态的.在算法中,通过引用传递* (`std::ref(obj)`) 避免复制模式并确保正确* 适应性行为** 笔记:* - 在 %Z 之前使用 %z 以正确处理 [-+]hhmm POSIX TZ 指示* -adaptive_parser 是线程安全的,只要它不在任何自适应* 模式(唯一允许的标志是 `full_match`)*/类自适应解析器{上市:typedef std::listlist_t;枚举 mode_t {fixed = 0,//不适应;继续以相同的顺序尝试相同的格式sticky = 1,//一致地重复使用第一个成功的格式ban_failed = 2,//忘记失败的格式mru = 4,//通过把最后一个已知的好东西放在前面来优化full_match = 8,//要求接受完全匹配};自适应解析器(mode_t m = full_match);自适应解析器(mode_t m,list_t 格式);//返回自纪元以来的秒数std::chrono::seconds operator()(std::string);私人的:mode_t _mode;list_t _formats;};静态内联adaptive_parser::mode_t operator|(adaptive_parser::mode_t lhs,adaptive_parser::mode_t rhs) {return static_cast(static_cast(lhs) | static_cast(rhs));}} }

你可以这样使用它:

在 Wandbox 上直播

#include "adaptive_parser.h"#include <字符串>#include int main() {使用命名空间 mylib::datetime;自适应解析器解析器 { 自适应解析器::全匹配,{"%Y %dth %B %H:%M %p","%dth %B %H:%M %p","%Y %dth %B %I:%M %p","%dth %B %I:%M %p",} };for (std::string const input : {"2017 年 1 月 28 日晚上 11:50",1 月 28 日晚上 11:50","2017 年 1 月 28 日下午 1:50",1 月 28 日下午 1:50",})尝试 {std::cout <<解析"<<输入<<"'
";std::cout <<" -> 纪元 " <<解析器(输入).计数()<<"
";} catch(std::exception const& e) {std::cout <<例外:"<

印刷:

解析 '2017 年 1 月 28 日晚上 11:50'->时代 1485604200解析1 月 28 日晚上 11:50"->纪元-2206613400解析2017 年 1 月 28 日下午 1:50"->纪元1485568200解析1 月 28 日下午 1:50"->纪元-2206649400

<块引用>

请注意,纪元 -2206613400 对应于 1900 年 1 月 28 日

实施

该实现附带了一堆经过精心调整的明确日期模式.我们的项目使用了一些hacks"来规范奇怪的输入格式,这些都被省略了(你可以看到对 detail::normalize_... 函数的注释引用以获得想法):

#include "adaptive_parser.h"#include "time.h"#include <向量>#include <算法>#include <cassert>#include #include 命名空间{枚举级别 { LOG_DEBUG };静态 std::ostream s_devnull { nullptr };结构{std::ostream&日志(整数)const {#ifdef NDEBUG返回 s_devnull;#别的返回 std::cerr;#万一};s_trace;}命名空间 mylib { 命名空间日期时间 {自适应解析器::自适应解析器(mode_t m): _mode(m), _formats {//当您怀疑有歧义或部分匹配时,使用 EOL_MARK 调试模式#define EOL_MARK ""//" EOL_MARK"//在 %Z 之前使用 %z 来正确处理 [-+]hhmm POSIX 时区偏移#if __GLIBC__ == 2 &&__GLIBC_MINOR__ <= 15//ubuntu 12.04 使用 eglibc 并且不解析所有花里胡哨#define WITH_TZ(prefix, suffix) 前缀" %z" 后缀, 前缀" %Z" 后缀, 前缀" Z" 后缀, 前缀" (UTC)" 后缀, 前缀后缀#别的#define WITH_TZ(prefix, suffix) 前缀"%z"后缀,前缀"%Z"后缀,前缀后缀#万一WITH_TZ("%Y-%m-%dT%H:%M:%S.%f", EOL_MARK),WITH_TZ("%Y-%m-%dT%H:%M:%S", EOL_MARK),WITH_TZ("%Y-%m-%dT%H:%M", EOL_MARK),//WITH_TZ("%Y-%m-%dT%I:%M:%S.%f %p", EOL_MARK),WITH_TZ("%Y-%m-%dT%I:%M:%S %p", EOL_MARK),WITH_TZ("%Y-%m-%dT%I:%M %p", EOL_MARK),//WITH_TZ("%Y-%m-%d%n%H:%M:%S", EOL_MARK),WITH_TZ("%Y-%m-%d%n%I:%M:%S %p", EOL_MARK),//WITH_TZ("%a %b %d %H:%M:%S %Y", EOL_MARK),WITH_TZ("%a %b %d %I:%M:%S %p %Y", EOL_MARK),//WITH_TZ("%a %d %b %H:%M:%S %Y", EOL_MARK),WITH_TZ("%a %d %b %I:%M:%S %p %Y", EOL_MARK),//WITH_TZ("%a, %b %d %H:%M:%S %Y", EOL_MARK),WITH_TZ("%a, %b %d %I:%M:%S %p %Y", EOL_MARK),//WITH_TZ("%a, %d %b %H:%M:%S %Y", EOL_MARK),WITH_TZ("%a, %d %b %I:%M:%S %p %Y", EOL_MARK),//////WITH_TZ("%a %d %b %Y %H:%M:%S", EOL_MARK),WITH_TZ("%a %d %b %Y %I:%M:%S %p", EOL_MARK),//WITH_TZ("%a, %d %b %Y %H:%M:%S", EOL_MARK),WITH_TZ("%a, %d %b %Y %I:%M:%S %p", EOL_MARK),#undef WITH_TZ/** 人类日期:** 这种模式会使 "%s" 产生歧义(遗憾的是,因为它* 导致明显虚假的结果,例如将1110871987"解析为* "2063-04-24 16:25:59" (因为 "1110-8-7T19:8:7" 匹配*%Y-%m-%dT%H:%M:%S %Z"不知何故......).** 我们通过规范化检测来解决这个问题* 'yyyyMMddhhmmss' 人类日期转换成 ISO 格式作为预处理* 步.*///"%Y %m %d %H %M %S" EOL_MARK,//纪元秒"@%s" EOL_MARK,%s"EOL_MARK,}{ }自适应解析器::自适应解析器(mode_t m, list_t 格式): _mode(m), _formats(std::move(formats)){ }std::chrono::secondsadaptive_parser::operator()(std::string input) {if (_formats.empty()) throw std::invalid_argument("datetime::adaptive_parser 中没有候选模式");if (input.empty()) throw std::invalid_argument("空输入不能被解析为日期时间");//detail::normalize_tz(input);//detail::normalize_tz_utc_w_offset_re(input);//detail::normalize_date_sep(input);//detail::normalize_human_date(input);//detail::normalize_redundant_timezone_description(input);输入 += EOL_MARK;std::vector失败的;布尔匹配 = 假;结构 tm 时间结构;自动模式 = _formats.begin();for (; !matched && 模式 != _formats.end(); ++pattern) {memset(&time_struct, 0, sizeof(time_struct));auto tail = ::strptime(input.c_str(), pattern->c_str(), &time_struct);匹配 = 尾;//如果(匹配)s_trace.log(LOG_DEBUG)<<输入'"<<输入<<"'成功匹配模式'" <<*模式<<'离开'"<<尾<<"'
";如果(_mode & full_match){while (tail && *tail && std::isspace(*tail))++尾巴;//跳过尾随空格匹配 &= 尾 &&!*尾巴;}如果(匹配)休息;if (_mode & ban_failed)失败.push_back(模式);}如果(匹配){for (auto to_ban : 失败) {s_trace.log(LOG_DEBUG)<<禁止失败的日期时间模式:"<<*to_ban <<"
";_formats.erase(to_ban);}如果(_mode & 粘性){s_trace.log(LOG_DEBUG)<<使后续的日期时间模式变得粘稠:" <<*模式<<"
";_formats = { *pattern };}if ((_mode & mru) && 模式!= _formats.begin()) {断言(模式!= _formats.end());//与 `matched==true` 不一致s_trace.log(LOG_DEBUG)<<"将后续的日期时间模式提升到顶部:" <<*模式<<"
";std::rotate(_formats.begin(), 模式, std::next(pattern));}#ifdef __FreeBSD__自动原始 = (time_struct.tm_gmtoff)?mktime(&time_struct) : timegm(&time_struct);返回 std::chrono::seconds(raw);#别的长偏移= time_struct.tm_gmtoff;返回 std::chrono::seconds(timegm (&time_struct) - 偏移量);#万一}s_trace.log(LOG_DEBUG)<<无法解析日期时间输入"<<输入<<"' 与 " <<_formats.size() <<" 模式
";throw std::runtime_error("输入不能被解析为日期时间");}} }

I am working on code which needs to compile on NDK toolchain. Unfortunately, latest version only supports till gcc4.9 which does not support C++11 datetime parsing. I have a date time string which I need to send thru two-three formats to figure out parsing method.

So I tried linux API stftime which sometimes give values on wrong parsing method. I had to abandon it and move to boost.

Now coming on to boost I am using 1_64 version. According to the documentation here

I could not find a way to parse single digit hour format.

bool getepochtime(const std::string &str, const std::string &format, unsigned long &epoch){
    epoch = 0;
    namespace bt = boost::posix_time;
    std::locale lformat = std::locale(std::locale::classic(), new bt::time_input_facet(format));
    bt::ptime pt;
    std::istringstream is(str);
    is.imbue(lformat);
    is >> pt;
    if (pt == bt::ptime()) {
        //epoch = 0;
        return false;
    }
    bt::ptime timet_start(boost::gregorian::date(1970, 1, 1));
    bt::time_duration diff = pt - timet_start;
    epoch = (1000 * diff.ticks()/bt::time_duration::rep_type::ticks_per_second);
    return true;
}

int main() {    
    unsigned long eval;
    // this works.
    getepochtime("28th january 11:50 PM", "%dth %B %H:%M %p", eval);
    // this does not work.
    getepochtime("28th january 1:50 PM", "%dth %B %I:%M %p", eval);
    // nor this.
    getepochtime("28th january 1:50 PM", "%dth %B %H:%M %p", eval);
    return 0;
}

Any help will be appreciated.

解决方案

I'll leave it to you to sort out how you want dates without years to be interpreted. However, here's a quick start using /just/ strptime.

I used it in a larger codebase, and we needed some pretty versatile date recognition. Behold: the adaptive datetime parser:

#pragma once

#include <string>
#include <chrono>
#include <cstdint>
#include <list>

namespace mylib { namespace datetime { 

/*
 * Multi-format capable date time parser
 *
 * Intended to be seeded with a list of supported formats, in order of
 * preference. By default, parser is not adaptive (mode is `fixed`).
 *
 * In adaptive modes the format can be required to be 
 *
 *  - sticky (consistently reuse the first matched format)
 *  - ban_failed (remove failed patterns from the list; banning only occurs
 *    on successful parse to avoid banning all patterns on invalid input)
 *  - mru (preserves the list but re-orders for performance)
 *
 * CAUTION: 
 *   If formats are ambiguous (e.g. mm-dd-yyyy vs dd-mm-yyyy) allowing
 *   re-ordering results in unpredictable results.
 *   => Only use `mru` when there are no ambiguous formats
 *
 * NOTE: 
 *   The function object is stateful. In algorithms, pass it by reference
 *   (`std::ref(obj)`) to avoid copying the patterns and to ensure correct
 *   adaptive behaviour
 *
 * NOTE:
 *   - use %z before %Z to correctly handle [-+]hhmm POSIX TZ indications
 *   - adaptive_parser is thread-safe as long as it's not in any adaptive
 *     mode (the only allowed flag is `full_match`)
 */
 class adaptive_parser {
   public:
     typedef std::list<std::string> list_t;

     enum mode_t {
         fixed      = 0, // not adapting; keep trying same formats in same order
         sticky     = 1, // re-use first successful format consistently
         ban_failed = 2, // forget formats that have failed
         mru        = 4, // optimize by putting last known good in front
         full_match = 8, // require full matches to be accepted
     };

     adaptive_parser(mode_t m = full_match);
     adaptive_parser(mode_t m, list_t formats);

     // returns seconds since epoch
     std::chrono::seconds operator()(std::string);

   private:
     mode_t _mode;
     list_t _formats;
};

static inline adaptive_parser::mode_t operator|(adaptive_parser::mode_t lhs, adaptive_parser::mode_t rhs) {
    return static_cast<adaptive_parser::mode_t>(static_cast<int>(lhs) | static_cast<int>(rhs)); 
}

} }

You can use it as such:

Live On Wandbox

#include "adaptive_parser.h"
#include <string>
#include <iostream>

int main() {    
    using namespace mylib::datetime;

    adaptive_parser parser { adaptive_parser::full_match, {
            "%Y %dth %B %H:%M %p",
               "%dth %B %H:%M %p",
            "%Y %dth %B %I:%M %p",
               "%dth %B %I:%M %p",
        } };

    for (std::string const input : {
            "2017 28th january 11:50 PM",
            "28th january 11:50 PM",
            "2017 28th january 1:50 PM",
            "28th january 1:50 PM",
        })
    try {
        std::cout << "Parsing '" << input << "'
";
        std::cout << " -> epoch " << parser(input).count() << "
";
    } catch(std::exception const& e) {
        std::cout << "Exception: " << e.what() << "
";
    }
}

Printing:

Parsing '2017 28th january 11:50 PM'
 -> epoch 1485604200
Parsing '28th january 11:50 PM'
 -> epoch -2206613400
Parsing '2017 28th january 1:50 PM'
 -> epoch 1485568200
Parsing '28th january 1:50 PM'
 -> epoch -2206649400

Note that epoch -2206613400 corresponds to 28 jan 1900

Implementation

The implementation comes with a bunch of pretty well-tuned unambiguous date patterns. Our project used a number of "hacks" to normalize strange input formats, these have been omitted (you can see the commented references to detail::normalize_... functions for ideas):

#include "adaptive_parser.h"
#include "time.h"
#include <vector>
#include <algorithm>
#include <cassert>
#include <cstring>
#include <iostream>

namespace {
    enum level { LOG_DEBUG };
    static std::ostream s_devnull { nullptr };

    struct {
        std::ostream& log(int) const {
#ifdef NDEBUG
            return s_devnull;
#else
            return std::cerr;
#endif
        };
    } s_trace;
}

namespace mylib { namespace datetime { 

    adaptive_parser::adaptive_parser(mode_t m) 
        : _mode(m), _formats {
// use EOL_MARK to debug patterns when you suspect ambiguity or partial matches
#define EOL_MARK "" // " EOL_MARK"
// use %z before %Z to correctly handle [-+]hhmm POSIX time zone offsets
#if __GLIBC__ == 2 && __GLIBC_MINOR__ <= 15
    // ubuntu 12.04 used eglibc and doesn't parse all bells and whistles
#define WITH_TZ(prefix, suffix) prefix " %z" suffix, prefix " %Z" suffix, prefix " Z" suffix, prefix " (UTC)" suffix, prefix suffix
#else
#define WITH_TZ(prefix, suffix) prefix " %z" suffix, prefix " %Z" suffix, prefix suffix
#endif
            WITH_TZ("%Y-%m-%dT%H:%M:%S.%f", EOL_MARK),
            WITH_TZ("%Y-%m-%dT%H:%M:%S", EOL_MARK),
            WITH_TZ("%Y-%m-%dT%H:%M", EOL_MARK),
            // 
            WITH_TZ("%Y-%m-%dT%I:%M:%S.%f %p", EOL_MARK),
            WITH_TZ("%Y-%m-%dT%I:%M:%S %p", EOL_MARK),
            WITH_TZ("%Y-%m-%dT%I:%M %p", EOL_MARK),
            // 
            WITH_TZ("%Y-%m-%d%n%H:%M:%S", EOL_MARK),
            WITH_TZ("%Y-%m-%d%n%I:%M:%S %p", EOL_MARK),
            //
            WITH_TZ("%a %b %d %H:%M:%S %Y", EOL_MARK),
            WITH_TZ("%a %b %d %I:%M:%S %p %Y", EOL_MARK),
            //
            WITH_TZ("%a %d %b %H:%M:%S %Y", EOL_MARK),
            WITH_TZ("%a %d %b %I:%M:%S %p %Y", EOL_MARK),
            //
            WITH_TZ("%a, %b %d %H:%M:%S %Y", EOL_MARK),
            WITH_TZ("%a, %b %d %I:%M:%S %p %Y", EOL_MARK),
            //
            WITH_TZ("%a, %d %b %H:%M:%S %Y", EOL_MARK),
            WITH_TZ("%a, %d %b %I:%M:%S %p %Y", EOL_MARK),
            //////
            WITH_TZ("%a %d %b %Y %H:%M:%S", EOL_MARK),
            WITH_TZ("%a %d %b %Y %I:%M:%S %p", EOL_MARK),
            //
            WITH_TZ("%a, %d %b %Y %H:%M:%S", EOL_MARK),
            WITH_TZ("%a, %d %b %Y %I:%M:%S %p", EOL_MARK),
#undef WITH_TZ
            /*
             * HUMAN DATE:
             *
             * This pattern would ambiguate the "%s" one (sadly, because it
             * leads to obviously bogus results like parsing "1110871987" into
             * "2063-04-24 16:25:59" (because "1110-8-7T19:8:7" matches
             * "%Y-%m-%dT%H:%M:%S %Z" somehow...).
             *
             * We work around this issue by normalizing detected
             * 'yyyyMMddhhmmss' human dates into iso format as a preprocessing
             * step.
             */
            //"%Y %m %d %H %M %S" EOL_MARK,

            // epoch seconds
            "@%s" EOL_MARK,
            "%s" EOL_MARK,
           }
    { }

    adaptive_parser::adaptive_parser(mode_t m, list_t formats)
        : _mode(m), _formats(std::move(formats))
    { }

    std::chrono::seconds adaptive_parser::operator()(std::string input) {
        if (_formats.empty()) throw std::invalid_argument("No candidate patterns in datetime::adaptive_parser");
        if (input.empty()) throw std::invalid_argument("Empty input cannot be parsed as a date time");

        //detail::normalize_tz(input);
        //detail::normalize_tz_utc_w_offset_re(input);
        //detail::normalize_date_sep(input);
        //detail::normalize_human_date(input);
        //detail::normalize_redundant_timezone_description(input);
        input += EOL_MARK;

        std::vector<list_t::iterator> failed;

        bool matched = false;
        struct tm time_struct;

        auto pattern = _formats.begin();
        for (; !matched && pattern != _formats.end(); ++pattern) {
            memset(&time_struct, 0, sizeof(time_struct));
            auto tail = ::strptime(input.c_str(), pattern->c_str(), &time_struct);

            matched = tail;
            //if (matched) s_trace.log(LOG_DEBUG) << "Input '" << input << "' successfully matched pattern '" << *pattern << "' leaving '" << tail << "'
";

            if (_mode & full_match) {
                while (tail && *tail && std::isspace(*tail))
                    ++tail; // skip trailing whitespace
                matched &= tail && !*tail;
            }

            if (matched)
                break;

            if (_mode & ban_failed)
                failed.push_back(pattern);
        }

        if (matched) {
            for (auto to_ban : failed) {
                s_trace.log(LOG_DEBUG) << "Banning failed datetime pattern: " << *to_ban << "
";
                _formats.erase(to_ban);
            }

            if (_mode & sticky) {
                s_trace.log(LOG_DEBUG) << "Made succeeding datetime pattern sticky: " << *pattern << "
";
                _formats = { *pattern };
            }

            if ((_mode & mru) && pattern != _formats.begin()) {
                assert(pattern != _formats.end()); // inconsistent with `matched==true`

                s_trace.log(LOG_DEBUG) << "Promote succeeding datetime pattern to the top: " << *pattern << "
";
                std::rotate(_formats.begin(), pattern, std::next(pattern));
            }
#ifdef __FreeBSD__
            auto raw = (time_struct.tm_gmtoff)? mktime(&time_struct) : timegm(&time_struct);
            return std::chrono::seconds(raw);
#else
            long offset = time_struct.tm_gmtoff;
            return std::chrono::seconds(timegm (&time_struct) - offset);
#endif
        }

        s_trace.log(LOG_DEBUG) << "Failed to parse datetime input '" << input << "' with " << _formats.size() << " patterns
";
        throw std::runtime_error("Input cannot be parsed as a date time");
    }

} }

相关文章