Back

Explore Courses Blog Tutorials Interview Questions
0 votes
2 views
in Data Science by (17.6k points)

I am using SVM to classify my text where in i don't actually get the result instead get with numerical probabilities.

Dataframe (1:20 trained set, 21:50 test set)

Updated:

     ou <- structure(list(text = structure(c(1L, 6L, 1L, 1L, 8L, 13L, 24L, 

5L, 11L, 12L, 33L, 36L, 20L, 25L, 4L, 19L, 9L, 29L, 22L, 3L, 

8L, 8L, 8L, 2L, 8L, 27L, 30L, 3L, 14L, 35L, 3L, 34L, 23L, 31L, 

22L, 6L, 6L, 7L, 17L, 3L, 8L, 32L, 18L, 15L, 21L, 26L, 3L, 16L, 

10L, 28L), .Label = c("access, access, access, access", "character(0)", 

"report", "report, access", "report, access, access", "report, access, access, access", 

"report, access, access, access, access, access, access", "report, access, access, access, access, access, access, access", 

"report, access, access, access, access, access, access, report", 

"report, access, access, access, access, access, report", "report, access, access, access, report", 

"report, access, access, access, report, access", "report, access, access, report, access, access, access, access, access, access", 

"report, data", "report, data, data", "report, data, data, data", 

"report, data, data, data, data", "report, data, data, data, data, data", 

"report, data, data, data, report, report, data, access,access", 

"report, data, data, report", "report, data, report", "report, report", 

"report, report, access, access, access", "report, report, access, access, report, report, report, report, report, report, data, data, report, access, report, report", 

"report, report, access, report, report, report, report, report, data, data, report, access, report, report", 

"report, report, access, report, report, report, report, report, report, data, data, report, access, report, report", 

"report, report, data", "report, report, data, report", "report, report, report, data, report, report, data, data, report, data, data", 

"report, report, report, report", "report, report, report, report, data, report, report, data, report, data, report", 

"report, report, report, report, report, data, report, data, data", 

"report, report, report, report, report, report, report", "report, report, report, report, report, report, report, access, access, access", 

"report, report, report, report, report, report, report, report, data, data, report, access, report, report", 

"report, report, report, report, report, report, report, report, report, report, data, report, report, report, report, report, report, report,report"

), class = "factor"), value = structure(c(2L, 2L, 2L, 2L, 2L, 

2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 

1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", 

"Access", "Report/Data"), class = "factor")), .Names = c("text", 

"value"), class = "data.frame", row.names = c(NA, -50L))

Code used:

        library(RTextTools)

        doc_matrix <- create_matrix(ou$text, language="english", removeNumbers=TRUE, stemWords=TRUE, removeSparseTerms=.998)

        #container <- create_container(doc_matrix, ou$text, trainSize=1:20, testSize=21:50, virgin=FALSE)

        container <- create_container(doc_matrix, as.numeric(factor(ou$text)), trainSize=1:20, testSize=21:50, virgin=FALSE)

        #Training models

        SVM <- train_model(container,"SVM")

        MAXENT <- train_model(container,"MAXENT")

        BAGGING <- train_model(container,"BAGGING")

        TREE <- train_model(container,"TREE")

        #Classify data using trained models

        SVM_CLASSIFY <- classify_model(container, SVM)

        MAXENT_CLASSIFY <- classify_model(container, MAXENT)

        BAGGING_CLASSIFY <- classify_model(container, BAGGING)

        #Analytics

        analytics <- create_analytics(container,SVM_CLASSIFY)

        models <- train_models(container, algorithms=c("MAXENT","SVM"))

        results <- classify_models(container, models)

        analytics <- create_analytics(container, results)

        summary(analytics)

        SVM <- cross_validate(container, 5, "SVM")

        write.csv(analytics@document_summary, "DocumentSummary.csv")

expected result:

          text                                                          value

     21 report, access, access, access, access, access, access, access       Access

     22 report, access, access, access, access, access, access, access       Access

     23 report, access, access, access, access, access, access, access       Access

     24 character(0)                                                          NA

     25 report, access, access, access, access, access, access, access       Access

     26 report, report, data                                             Report/Data

     27 report, report, report, report                                   Report/Data

     28 report                                                          Report/Data

     29 report, data                                                    Report/Data

     30 report, report, report, report, report, report, report, report,

         data, data, report, access, report, report                      Report/Data

the result where probabilities are :

>   MAXENTROPY_LABEL    MAXENTROPY_PROB SVM_LABEL   SVM_PROB    

> 1 8   0.999999066 22  0.070090645 8   8   1   0   8   0

> 2 8   0.999999066 22  0.070090645 8   8   1   0   8   0

> 3 8   0.999999066 22  0.070090645 8   8   1   0   8   0

> 4 1   0.055555556 12  0.071384112 2   12  1   1   12  1

> 5 8   0.999999066 22  0.070090645 8   8   1   0   8   0

> 6 25  1   12  0.074126949 27  25  1   1   25  1

> 7 33  0.627904676 13  0.068572857 30  33  1   1   33  1

> 8 33  0.406792176 12  0.074592181 3   33  1   1   33  1

> 9 20  1   12  0.074507793 14  20  1   1   20  1

MANUAL_CODE CONSENSUS_CODE  CONSENSUS_AGREE CONSENSUS_INCORRECT PROBABILITY_CODE    PROBABILITY_INCORRECT

EDIT 1: How can i achieve the label names instead of SVM label numbers.

1 Answer

0 votes
by (41.4k points)

You can do this:

ou <- cbind(ou$text, results)

To print the labels:

ou$value <- "NONE"

ou$value[results$SVM_LABEL=="1"]  <- "Access"

ou$value[results$SVM_LABEL=="-1"] <- "Report/Data"

Browse Categories

...