티스토리 뷰

strings.pdf


Detect Matches

> library(data.table)

> library(stringr)


# 함수 실습을 위해 간단한 데이터테이블을 만든다.

> string <- data.table(member = c("mother", "father",

+                                 "grandmother", "grandfather",

+                                 "son 1", "daughter 1",

+                                 "son 2", "daughter 2","uncle"),

+                      old = c(50, 55, 80, 84, 14, 20, 15,18,40))


> string

         member old

1:        mother  50

2:          father  55

3: grandmother  80

4:   grandfather  84

5:           son 1  14

6:    daughter 1  20

7:           son 2  15

8:    daughter 2  18

9:            uncle  40

 


# str_detect(string, pattern) : 문자열(string)에서 pattern을 비교하여 진릿값(TRUE/FALSE) 를 알려준다.

> str_detect(string$member, "mo")

[1]  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE


# str_which(string, pattern) : 문자열(string)에서 pattern을 비교하여 TRUE인 인덱스를 알려준다

> str_which(string$member, "mo")

[1] 1 3


# str_detect, str_which 응용

> mother_index1 <- str_detect(string$member, "mo")

> mother_index2 <- str_which(string$member, "mo")


> string[mother_index1]

        member old

1:      mother  50

2: grandmother  80


> string[mother_index2]

        member old

1:      mother  50

2: grandmother  80



# str_count(string, pattern) : string에서 pattern의 갯수를 counting 해준다.

> str_count(string$member, "a")

[1] 0 1 1 2 0 1 0 1 0


# str_locate(string, pattern) : string에서 pattern의 위치를 알려준다.(행렬 반환)

> str_locate(string$member, "a")

      start end

 [1,]    NA  NA

 [2,]     2   2

 [3,]     3   3

 [4,]     3   3

 [5,]    NA  NA

 [6,]     2   2

 [7,]    NA  NA

 [8,]     2   2

 [9,]    NA  NA


# str_locate_all(string, pattern) : string에서 pattern의 위치를 알려준다.(리스트 반환)

> str_locate_all(string$member, "a")

[[1]]

     start end


[[2]]

     start end

[1,]     2   2


[[3]]

     start end

[1,]     3   3


        .

        .

        .



Subset Strings

#str_sub(string, start = , end = ) : start 부터 end까지 나타낸다.

> str_sub(string$member, start = 1, end = 3)

[1] "mot" "fat" "gra" "gra" "son" "dau" "son" "dau" "unc"



#str_subset(string, pattern) : 문자열(string)에서 pattern을 포함하는 것을 나타낸다.

> str_subset(string$member, "fa")

[1] "father"      "grandfather"


#str_subset 응용

> father_subset <- str_subset(string$member, "fa")

> string[member %in% father_subset]

        member old

1:        father   55

2: grandfather  84



#str_extract(string, pattern) : 첫번째 pattern을 벡터 형식으로 나타낸다.

> str_extract(string$member, "[a-f]")

[1] "e" "f" "a" "a" NA  "d" NA  "d" "c"


#str_extract(string, pattern) : 모든 pattern을 리스트 형식으로 나타낸다.

> str_extract_all(string$member, "[a-f]")

[[1]]

[1] "e"


[[2]]

[1] "f" "a" "e"

          .

          .

          .


#str_match(string, pattern) : 첫번째 pattern을 행렬 형식으로 나타낸다.

> str_match(string$member, "[a-f]")

      [,1]

 [1,] "e" 

 [2,] "f" 

 [3,] "a" 

 [4,] "a" 

 [5,] NA  

 [6,] "d" 

 [7,] NA  

 [8,] "d" 

 [9,] "c" 


#str_match_all(string, pattern) : 모든 pattern을 리스트 형식으로 나타낸다.

> str_match_all(string$member, "[a-f]")

[[1]]

     [,1]

[1,] "e" 


[[2]]

     [,1]

[1,] "f" 

[2,] "a" 

[3,] "e" 

            

       .

       .

       .




Manage Lengths


> str_length(string$member)

[1]  6  6 11 11  5 10  5 10  5


> str_pad(string$member, 15, side = "right")

[1] "mother         " "father         " "grandmother    " "grandfather    " "son 1          "

[6] "daughter 1     " "son 2          " "daughter 2     " "uncle          "


> str_pad(string$member, 15, side = "left")

[1] "         mother" "         father" "    grandmother" "    grandfather" "          son 1"

[6] "     daughter 1" "          son 2" "     daughter 2" "          uncle"


> str_pad(string$member, 15, side = "both")

[1] "    mother     " "    father     " "  grandmother  " "  grandfather  " "     son 1     "

[6] "  daughter 1   " "     son 2     " "  daughter 2   " "     uncle     "


> str_pad(string$member, 15, side = "both", pad = ".")

[1] "....mother....." "....father....." "..grandmother.." "..grandfather.." ".....son 1....."

[6] "..daughter 1..." ".....son 2....." "..daughter 2..." ".....uncle....."


> str_trunc(string$member, 5, side = "right")

[1] "mo..." "fa..." "gr..." "gr..." "son 1" "da..." "son 2" "da..." "uncle"


> str_trunc(string$member, 5, side = "left")

[1] "...er" "...er" "...er" "...er" "son 1" "... 1" "son 2" "... 2" "uncle"


> str_trunc(string$member, 5, side = "center")

[1] "m...r" "f...r" "g...r" "g...r" "son 1" "d...1" "son 2" "d...2" "uncle"


> str_trunc(string$member, 5, side = "center", ellipsis = ".")

[1] "mo.er" "fa.er" "gr.er" "gr.er" "son 1" "da. 1" "son 2" "da. 2" "uncle"





Mutate Strings

> string_sub <- string

> str_sub(string_sub$member, start = 1, end = 4) <- "xxxx"

> string_sub

        member old

1:      xxxxer  50

2:      xxxxer  55

3: xxxxdmother  80

4: xxxxdfather  84

5:       xxxx1  14

6:  xxxxhter 1  20

7:       xxxx2  15

8:  xxxxhter 2  18

9:       xxxxe  40


> string_sub <- string

> str_replace(string_sub$member, "a", "-")

[1] "mother"      "f-ther"      "gr-ndmother" "gr-ndfather" "son 1"       "d-ughter 1"  "son 2"      

[8] "d-ughter 2"  "uncle"      



> string_sub <- string

> str_replace_all(string_sub$member, "a", "-")

[1] "mother"      "f-ther"      "gr-ndmother" "gr-ndf-ther" "son 1"       "d-ughter 1"  "son 2"      

[8] "d-ughter 2"  "uncle"      


> string_sub <- string

> str_to_upper(string_sub$member, locale = "en")

[1] "MOTHER"      "FATHER"      "GRANDMOTHER" "GRANDFATHER" "SON 1"       "DAUGHTER 1"  "SON 2"  [8] "DAUGHTER 2"  "UNCLE"      


> str_to_lower(string_sub$member, locale = "en")

[1] "mother"      "father"      "grandmother" "grandfather" "son 1"       "daughter 1"  "son 2"      

[8] "daughter 2"  "uncle"      


> str_to_title(string_sub$member, locale = "en")

[1] "Mother"      "Father"      "Grandmother" "Grandfather" "Son 1"       "Daughter 1"  "Son 2"      

[8] "Daughter 2"  "Uncle"      



> str_c(string_sub$member, collapse = "")

[1] "motherfathergrandmothergrandfatherson 1daughter 1son 2daughter 2uncle"

> string_sub

        member old

1:      mother  50

2:      father  55

3: grandmother  80

4: grandfather  84

5:       son 1  14

6:  daughter 1  20

7:       son 2  15

8:  daughter 2  18

9:       uncle  40



> str_dup(string_sub$member,times = 2)

[1] "mothermother"           "fatherfather"           "grandmothergrandmother" "grandfathergrandfather"

[5] "son 1son 1"             "daughter 1daughter 1"   "son 2son 2"             "daughter 2daughter 2"  

[9] "uncleuncle"            


> str_split_fixed(string_sub$member, " ", n = 2) #matrix 형태

      [,1]             [,2]

 [1,] "mother"      ""  

 [2,] "father"       ""  

 [3,] "grandmother" ""  

 [4,] "grandfather" ""  

 [5,] "son"         "1" 

 [6,] "daughter"    "1" 

 [7,] "son"         "2" 

 [8,] "daughter"    "2" 

 [9,] "uncle"       ""  



> str_split(string_sub$member, " ", n = 2) #list 형태

[[1]]

[1] "mother"


[[2]]

[1] "father"


[[3]]

[1] "grandmother"


[[4]]

[1] "grandfather"


[[5]]

[1] "son" "1"  


[[6]]

[1] "daughter" "1"       


[[7]]

[1] "son" "2"  


[[8]]

[1] "daughter" "2"       


[[9]]

[1] "uncle"



> str_order(string_sub$member, decreasing = FALSE)

[1] 6 8 2 4 3 1 5 7 9

> str_order(string_sub$member, decreasing = TRUE)

[1] 9 7 5 1 3 4 2 8 6


> member_order <- str_order(string_sub$member, decreasing = FALSE)

> string_sub$member[member_order]

[1] "daughter 1"  "daughter 2"  "father"      "grandfather" "grandmother" "mother"      "son 1"      

[8] "son 2"       "uncle"      


댓글
공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
«   2024/04   »
1 2 3 4 5 6
7 8 9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30
글 보관함