티스토리 뷰
Detect Matches
> library(data.table)
> library(stringr)
# 함수 실습을 위해 간단한 데이터테이블을 만든다.
> string <- data.table(member = c("mother", "father",
+ "grandmother", "grandfather",
+ "son 1", "daughter 1",
+ "son 2", "daughter 2","uncle"),
+ old = c(50, 55, 80, 84, 14, 20, 15,18,40))
> string
member old
1: mother 50
2: father 55
3: grandmother 80
4: grandfather 84
5: son 1 14
6: daughter 1 20
7: son 2 15
8: daughter 2 18
9: uncle 40
# str_detect(string, pattern) : 문자열(string)에서 pattern을 비교하여 진릿값(TRUE/FALSE) 를 알려준다.
> str_detect(string$member, "mo")
[1] TRUE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
# str_which(string, pattern) : 문자열(string)에서 pattern을 비교하여 TRUE인 인덱스를 알려준다
> str_which(string$member, "mo")
[1] 1 3
# str_detect, str_which 응용
> mother_index1 <- str_detect(string$member, "mo")
> mother_index2 <- str_which(string$member, "mo")
> string[mother_index1]
member old
1: mother 50
2: grandmother 80
> string[mother_index2]
member old
1: mother 50
2: grandmother 80
# str_count(string, pattern) : string에서 pattern의 갯수를 counting 해준다.
> str_count(string$member, "a")
[1] 0 1 1 2 0 1 0 1 0
# str_locate(string, pattern) : string에서 pattern의 위치를 알려준다.(행렬 반환)
> str_locate(string$member, "a")
start end
[1,] NA NA
[2,] 2 2
[3,] 3 3
[4,] 3 3
[5,] NA NA
[6,] 2 2
[7,] NA NA
[8,] 2 2
[9,] NA NA
# str_locate_all(string, pattern) : string에서 pattern의 위치를 알려준다.(리스트 반환)
> str_locate_all(string$member, "a")
[[1]]
start end
[[2]]
start end
[1,] 2 2
[[3]]
start end
[1,] 3 3
.
.
.
Subset Strings
#str_sub(string, start = , end = ) : start 부터 end까지 나타낸다.
> str_sub(string$member, start = 1, end = 3)
[1] "mot" "fat" "gra" "gra" "son" "dau" "son" "dau" "unc"
#str_subset(string, pattern) : 문자열(string)에서 pattern을 포함하는 것을 나타낸다.
> str_subset(string$member, "fa")
[1] "father" "grandfather"
#str_subset 응용
> father_subset <- str_subset(string$member, "fa")
> string[member %in% father_subset]
member old
1: father 55
2: grandfather 84
#str_extract(string, pattern) : 첫번째 pattern을 벡터 형식으로 나타낸다.
> str_extract(string$member, "[a-f]")
[1] "e" "f" "a" "a" NA "d" NA "d" "c"
#str_extract(string, pattern) : 모든 pattern을 리스트 형식으로 나타낸다.
> str_extract_all(string$member, "[a-f]")
[[1]]
[1] "e"
[[2]]
[1] "f" "a" "e"
.
.
.
#str_match(string, pattern) : 첫번째 pattern을 행렬 형식으로 나타낸다.
> str_match(string$member, "[a-f]")
[,1]
[1,] "e"
[2,] "f"
[3,] "a"
[4,] "a"
[5,] NA
[6,] "d"
[7,] NA
[8,] "d"
[9,] "c"
#str_match_all(string, pattern) : 모든 pattern을 리스트 형식으로 나타낸다.
> str_match_all(string$member, "[a-f]")
[[1]]
[,1]
[1,] "e"
[[2]]
[,1]
[1,] "f"
[2,] "a"
[3,] "e"
.
.
.
Manage Lengths
> str_length(string$member)
[1] 6 6 11 11 5 10 5 10 5
> str_pad(string$member, 15, side = "right")
[1] "mother " "father " "grandmother " "grandfather " "son 1 "
[6] "daughter 1 " "son 2 " "daughter 2 " "uncle "
> str_pad(string$member, 15, side = "left")
[1] " mother" " father" " grandmother" " grandfather" " son 1"
[6] " daughter 1" " son 2" " daughter 2" " uncle"
> str_pad(string$member, 15, side = "both")
[1] " mother " " father " " grandmother " " grandfather " " son 1 "
[6] " daughter 1 " " son 2 " " daughter 2 " " uncle "
> str_pad(string$member, 15, side = "both", pad = ".")
[1] "....mother....." "....father....." "..grandmother.." "..grandfather.." ".....son 1....."
[6] "..daughter 1..." ".....son 2....." "..daughter 2..." ".....uncle....."
> str_trunc(string$member, 5, side = "right")
[1] "mo..." "fa..." "gr..." "gr..." "son 1" "da..." "son 2" "da..." "uncle"
> str_trunc(string$member, 5, side = "left")
[1] "...er" "...er" "...er" "...er" "son 1" "... 1" "son 2" "... 2" "uncle"
> str_trunc(string$member, 5, side = "center")
[1] "m...r" "f...r" "g...r" "g...r" "son 1" "d...1" "son 2" "d...2" "uncle"
> str_trunc(string$member, 5, side = "center", ellipsis = ".")
[1] "mo.er" "fa.er" "gr.er" "gr.er" "son 1" "da. 1" "son 2" "da. 2" "uncle"
Mutate Strings
> string_sub <- string
> str_sub(string_sub$member, start = 1, end = 4) <- "xxxx"
> string_sub
member old
1: xxxxer 50
2: xxxxer 55
3: xxxxdmother 80
4: xxxxdfather 84
5: xxxx1 14
6: xxxxhter 1 20
7: xxxx2 15
8: xxxxhter 2 18
9: xxxxe 40
> string_sub <- string
> str_replace(string_sub$member, "a", "-")
[1] "mother" "f-ther" "gr-ndmother" "gr-ndfather" "son 1" "d-ughter 1" "son 2"
[8] "d-ughter 2" "uncle"
> string_sub <- string
> str_replace_all(string_sub$member, "a", "-")
[1] "mother" "f-ther" "gr-ndmother" "gr-ndf-ther" "son 1" "d-ughter 1" "son 2"
[8] "d-ughter 2" "uncle"
> string_sub <- string
> str_to_upper(string_sub$member, locale = "en")
[1] "MOTHER" "FATHER" "GRANDMOTHER" "GRANDFATHER" "SON 1" "DAUGHTER 1" "SON 2" [8] "DAUGHTER 2" "UNCLE"
> str_to_lower(string_sub$member, locale = "en")
[1] "mother" "father" "grandmother" "grandfather" "son 1" "daughter 1" "son 2"
[8] "daughter 2" "uncle"
> str_to_title(string_sub$member, locale = "en")
[1] "Mother" "Father" "Grandmother" "Grandfather" "Son 1" "Daughter 1" "Son 2"
[8] "Daughter 2" "Uncle"
> str_c(string_sub$member, collapse = "")
[1] "motherfathergrandmothergrandfatherson 1daughter 1son 2daughter 2uncle"
> string_sub
member old
1: mother 50
2: father 55
3: grandmother 80
4: grandfather 84
5: son 1 14
6: daughter 1 20
7: son 2 15
8: daughter 2 18
9: uncle 40
> str_dup(string_sub$member,times = 2)
[1] "mothermother" "fatherfather" "grandmothergrandmother" "grandfathergrandfather"
[5] "son 1son 1" "daughter 1daughter 1" "son 2son 2" "daughter 2daughter 2"
[9] "uncleuncle"
> str_split_fixed(string_sub$member, " ", n = 2) #matrix 형태
[,1] [,2]
[1,] "mother" ""
[2,] "father" ""
[3,] "grandmother" ""
[4,] "grandfather" ""
[5,] "son" "1"
[6,] "daughter" "1"
[7,] "son" "2"
[8,] "daughter" "2"
[9,] "uncle" ""
> str_split(string_sub$member, " ", n = 2) #list 형태
[[1]]
[1] "mother"
[[2]]
[1] "father"
[[3]]
[1] "grandmother"
[[4]]
[1] "grandfather"
[[5]]
[1] "son" "1"
[[6]]
[1] "daughter" "1"
[[7]]
[1] "son" "2"
[[8]]
[1] "daughter" "2"
[[9]]
[1] "uncle"
> str_order(string_sub$member, decreasing = FALSE)
[1] 6 8 2 4 3 1 5 7 9
> str_order(string_sub$member, decreasing = TRUE)
[1] 9 7 5 1 3 4 2 8 6
> member_order <- str_order(string_sub$member, decreasing = FALSE)
> string_sub$member[member_order]
[1] "daughter 1" "daughter 2" "father" "grandfather" "grandmother" "mother" "son 1"
[8] "son 2" "uncle"
- Total
- Today
- Yesterday
- 빅데이터
- 빅데이터분석
- 데이터형태
- 기초
- 병합
- data.table
- Kaggle
- rbase
- 벡터
- 분석
- RStudio
- 우분투
- 실기
- Bigdata
- 리눅스
- 빅데이터분석전문가
- 타입
- ADSP
- 데이터구조
- 데이터분석
- ADP
- Jupyter notebook
- subset
- 실습
- 함수
- data.frame
- 16.04
- vector
- Titanic
- R
일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | 2 | |||||
3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | 12 | 13 | 14 | 15 | 16 |
17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 26 | 27 | 28 | 29 | 30 |