blob: 9a5e88ed3b83556e313edc2ea81efafb842cb786 [file] [log] [blame]
// Copyright 2006-2008 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/string_tokenizer.h"
#include "testing/gtest/include/gtest/gtest.h"
using std::string;
namespace gurl_base {
namespace {
TEST(StringTokenizerTest, Simple) {
string input = "this is a test";
StringTokenizer t(input, " ");
// The start of string, before returning any tokens, is considered a
// delimiter.
EXPECT_TRUE(t.token_is_delim());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("this", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("is", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("a", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("test", t.token());
EXPECT_FALSE(t.GetNext());
// The end of string, after the last token tokens, is considered a delimiter.
EXPECT_TRUE(t.token_is_delim());
}
TEST(StringTokenizerTest, Reset) {
string input = "this is a test";
StringTokenizer t(input, " ");
for (int i = 0; i < 2; ++i) {
EXPECT_TRUE(t.token_is_delim());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("this", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("is", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("a", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("test", t.token());
EXPECT_FALSE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
t.Reset();
}
}
TEST(StringTokenizerTest, RetDelims) {
string input = "this is a test";
StringTokenizer t(input, " ");
t.set_options(StringTokenizer::RETURN_DELIMS);
EXPECT_TRUE(t.token_is_delim());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("this", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
EXPECT_EQ(" ", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("is", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
EXPECT_EQ(" ", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("a", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
EXPECT_EQ(" ", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("test", t.token());
EXPECT_FALSE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
}
TEST(StringTokenizerTest, RetEmptyTokens) {
string input = "foo='a, b',,bar,,baz,quux";
StringTokenizer t(input, ",");
t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
t.set_quote_chars("'");
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("foo='a, b'", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("bar", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("baz", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("quux", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, RetEmptyTokens_AtStart) {
string input = ",bar";
StringTokenizer t(input, ",");
t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
t.set_quote_chars("'");
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("bar", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, RetEmptyTokens_AtEnd) {
string input = "bar,";
StringTokenizer t(input, ",");
t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
t.set_quote_chars("'");
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("bar", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, RetEmptyTokens_Both) {
string input = ",";
StringTokenizer t(input, ",");
t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
t.set_quote_chars("'");
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, RetEmptyTokens_Empty) {
string input = "";
StringTokenizer t(input, ",");
t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, RetDelimsAndEmptyTokens) {
string input = "foo='a, b',,bar,,baz,quux";
StringTokenizer t(input, ",");
t.set_options(StringTokenizer::RETURN_DELIMS |
StringTokenizer::RETURN_EMPTY_TOKENS);
t.set_quote_chars("'");
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("foo='a, b'", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ(",", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ(",", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("bar", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ(",", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ(",", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("baz", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ(",", t.token());
ASSERT_TRUE(t.GetNext());
EXPECT_EQ("quux", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, ManyDelims) {
string input = "this: is, a-test";
StringTokenizer t(input, ": ,-");
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("this", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("is", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("a", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("test", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, ParseHeader) {
string input = "Content-Type: text/html ; charset=UTF-8";
StringTokenizer t(input, ": ;=");
t.set_options(StringTokenizer::RETURN_DELIMS);
EXPECT_TRUE(t.token_is_delim());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("Content-Type", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
EXPECT_EQ(":", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
EXPECT_EQ(" ", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("text/html", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
EXPECT_EQ(" ", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
EXPECT_EQ(";", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
EXPECT_EQ(" ", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("charset", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
EXPECT_EQ("=", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_FALSE(t.token_is_delim());
EXPECT_EQ("UTF-8", t.token());
EXPECT_FALSE(t.GetNext());
EXPECT_TRUE(t.token_is_delim());
}
TEST(StringTokenizerTest, ParseQuotedString) {
string input = "foo bar 'hello world' baz";
StringTokenizer t(input, " ");
t.set_quote_chars("'");
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("foo", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("bar", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("'hello world'", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("baz", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, ParseQuotedString_Malformed) {
string input = "bar 'hello wo";
StringTokenizer t(input, " ");
t.set_quote_chars("'");
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("bar", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("'hello wo", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, ParseQuotedString_Multiple) {
string input = "bar 'hel\"lo\" wo' baz\"";
StringTokenizer t(input, " ");
t.set_quote_chars("'\"");
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("bar", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("'hel\"lo\" wo'", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("baz\"", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, ParseQuotedString_EscapedQuotes) {
string input = "foo 'don\\'t do that'";
StringTokenizer t(input, " ");
t.set_quote_chars("'");
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("foo", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("'don\\'t do that'", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, ParseQuotedString_EscapedQuotes2) {
string input = "foo='a, b', bar";
StringTokenizer t(input, ", ");
t.set_quote_chars("'");
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("foo='a, b'", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("bar", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, ParseWithWhitespace_NoQuotes) {
string input = "\t\t\t foo=a,\r\n b,\r\n\t\t\t bar\t ";
StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver);
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("foo=a", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("b", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("bar", t.token());
EXPECT_FALSE(t.GetNext());
}
TEST(StringTokenizerTest, ParseWithWhitespace_Quotes) {
string input = "\t\t\t foo='a, b',\t\t\t bar\t ";
StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver);
t.set_quote_chars("'");
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("foo='a, b'", t.token());
EXPECT_TRUE(t.GetNext());
EXPECT_EQ("bar", t.token());
EXPECT_FALSE(t.GetNext());
}
} // namespace
} // namespace base