Created By: demo
Created At: Tue, 31 Mar 2020 12:25:24 -0500
Input Dataset: 2020 Mar/COVID-19
Last Submitted At: Tue, 31 Mar 2020 12:25:24 -0500
Last Finished At: Tue, 31 Mar 2020 12:26:16 -0500 (52s)
Source Code
# What 100 words appear in the most paper abstracts?
p: Paper = input;
o: output top(100) of string weight int;
stopwords: set of string = stop_words();
# store all words used in this paper's abstract
abstractWords: set of string;
foreach (i: int; def(p.abstract[i])) {
paragraphWords: array of string = splitall(lowercase(p.abstract[i].text), " ");
foreach (j: int; !contains(stopwords, paragraphWords[j]))
add(abstractWords, paragraphWords[j]);
}
words := values(abstractWords);
foreach (k: int; def(words[k]))
o << words[k] weight 1;
Output
Job Output Size: 2.11k
o[] = virus, 7711.0
o[] = viral, 6080.0
o[] = study, 5857.0
o[] = infection, 5305.0
o[] = results, 5051.0
o[] = respiratory, 5048.0
o[] = may, 4757.0
o[] = used, 4635.0
o[] = human, 4633.0
o[] = disease, 4534.0
o[] = two, 4317.0
o[] = however,, 4272.0
o[] = protein, 3953.0
o[] = one, 3899.0
o[] = data, 3897.0
o[] = cell, 3740.0
o[] = different, 3740.0
o[] = viruses, 3697.0
o[] = clinical, 3680.0
o[] = cells, 3674.0
o[] = associated, 3526.0
o[] = including, 3483.0
o[] = found, 3474.0
o[] = high, 3459.0
o[] = analysis, 3451.0
o[] = potential, 3354.0
o[] = important, 3319.0
o[] = showed, 3314.0
o[] = new, 3266.0
o[] = infectious, 3177.0
o[] = acute, 3155.0
o[] = role, 3066.0
o[] = identified, 3061.0
o[] = studies, 3051.0
o[] = well, 3000.0
o[] = severe, 2981.0
o[] = could, 2925.0
o[] = health, 2909.0
o[] = 2, 2855.0
o[] = compared, 2841.0
o[] = among, 2813.0
o[] = first, 2801.0
o[] = patients, 2784.0
o[] = coronavirus, 2764.0
o[] = control, 2761.0
o[] = background:, 2718.0
o[] = based, 2691.0
o[] = results:, 2681.0
o[] = significant, 2671.0
o[] = novel, 2658.0
o[] = 1, 2615.0
o[] = development, 2611.0
o[] = infections, 2611.0
o[] = present, 2581.0
o[] = specific, 2574.0
o[] = use, 2574.0
o[] = infected, 2553.0
o[] = rna, 2509.0
o[] = three, 2497.0
o[] = infection., 2487.0
o[] = immune, 2466.0
o[] = expression, 2461.0
o[] = several, 2453.0
o[] = number, 2443.0
o[] = host, 2442.0
o[] = significantly, 2435.0
o[] = provide, 2385.0
o[] = syndrome, 2377.0
o[] = within, 2340.0
o[] = gene, 2328.0
o[] = major, 2325.0
o[] = increased, 2321.0
o[] = proteins, 2317.0
o[] = many, 2299.0
o[] = detected, 2288.0
o[] = response, 2279.0
o[] = model, 2213.0
o[] = treatment, 2211.0
o[] = study,, 2191.0
o[] = observed, 2180.0
o[] = influenza, 2161.0
o[] = developed, 2142.0
o[] = higher, 2106.0
o[] = system, 2096.0
o[] = highly, 2082.0
o[] = although, 2066.0
o[] = samples, 2064.0
o[] = research, 2048.0
o[] = molecular, 2035.0
o[] = 3, 1979.0
o[] = common, 1970.0
o[] = diseases, 1969.0
o[] = recent, 1964.0
o[] = effective, 1961.0
o[] = due, 1939.0
o[] = positive, 1935.0
o[] = activity, 1934.0
o[] = cause, 1930.0
o[] = time, 1926.0
o[] = cells., 1921.0
Compilation
Status: Finished
Started: Tue, 31 Mar 2020 12:25:24 -0500
Finished: Tue, 31 Mar 2020 12:25:32 -0500 (8s)
Execution
Status: Finished
Started: Tue, 31 Mar 2020 12:25:36 -0500
Finished: Tue, 31 Mar 2020 12:26:16 -0500 (40s)