@INPROCEEDINGS{olsson05cross,
author = {J. Scott Olsson and Douglas W. Oard and Jan Hajic},
title = {{Cross-language text classification}},
booktitle = {{SIGIR '05: Proceedings of the 28th annual international ACM SIGIR conference on Research and development in information retrieval}},
year = {2005},
isbn = {1-59593-034-5},
pages = {645--646},
location = {Salvador, Brazil},
doi = {http://doi.acm.org/10.1145/1076034.1076170},
ps = {http://www.math.umd.edu/~olsson/papers/p031-olsson.ps.gz},
pdf = {http://www.math.umd.edu/~olsson/papers/p031-olsson.pdf},
publisher = {ACM Press},
address = {New York, NY, USA}
}
@INPROCEEDINGS{olsson07fast,
author = {J. Scott Olsson and Jonathan Wintrode and Matthew Lee},
title = {{Fast Unconstrained Audio Search in Numerous Human Languages}},
booktitle = {{ICASSP'07: Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing}},
year = {2007},
ps = {http://www.math.umd.edu/~olsson/papers/olsson07fast.ps.gz},
pdf = {http://www.math.umd.edu/~olsson/papers/olsson07fast.pdf},
abstract = {We present a system to index and search conversational speech
using a scoring heuristic on the expected posterior counts of
phone $n$-grams in recognition lattices. We report significant
improvements in retrieval effectiveness on five human languages over
a strong 1-best baseline. The method is shown to improve the utility (mean average
precision) of the retrieved lattices' rank order and to do so with a search cost negligible
compared to the fastest yet known methods for the linear scanning of phonetic lattices.}
}
@INPROCEEDINGS{olsson06combining,
author = {J. Scott Olsson and Douglas W. Oard},
title = {Combining feature selectors for text classification},
booktitle = {{CIKM '06: Proceedings of the 15th ACM international conference on Information and knowledge management}},
year = {2006},
isbn = {1-59593-433-2},
pages = {798--799},
location = {Arlington, Virginia, USA},
doi = {http://doi.acm.org/10.1145/1183614.1183736},
publisher = {ACM Press},
address = {New York, NY, USA},
ps = {http://www.math.umd.edu/~olsson/papers/cikm645-olsson.ps.gz},
pdf = {http://www.math.umd.edu/~olsson/papers/cikm645-olsson.pdf},
abstract = {We introduce several methods of combining feature selectors for text classification.
Results from a large investigation of these combinations are summarized. Easily
constructed combinations of feature selectors are shown to improve peak $R$-precision
and $F_1$ at statistically significant levels.}
}
@INPROCEEDINGS{olsson06analysis,
author = {J. Scott Olsson},
title = {{An Analysis of the Coupling between Training Set and Neighborhood Sizes for the kNN Classifier}},
booktitle = {{SIGIR '06: Proceedings of the 29th annual international ACM SIGIR conference on Research and development in information retrieval}},
year = {2006},
location = {Seattle Washington, USA},
publisher = {ACM Press},
address = {New York, NY, USA},
ps = {http://www.math.umd.edu/~olsson/papers/pp107-olsson.ps.gz},
pdf = {http://www.math.umd.edu/~olsson/papers/pp107-olsson.pdf},
abstract = {We consider the relationship between training set size and the parameter $k$ for the $k$-Nearest Neighbors ($k$NN) classifier.
When few examples are available, we observe that accuracy is sensitive to $k$ and that best $k$ tends to increase with training size.
We explore the subsequent risk that $k$ tuned on partitions will be suboptimal after aggregation and re-training. This risk is found to be
most severe when little data is available. For larger training sizes, accuracy becomes increasingly
stable with respect to $k$ and the risk decreases.}
}
@INPROCEEDINGS{olsson06exploring,
author = {J. Scott Olsson},
title = {{Exploring Feature Selection for Multi-Label Text Classification using Ranked Retrieval Measures}},
booktitle = {{UMD Technical Report: LAMP-TR-134, UMIACS-TR-2006-41, CS-TR-4824}},
year = {2006},
ps = {http://www.math.umd.edu/~olsson/papers/olsson06exploring.ps.gz},
pdf = {http://www.math.umd.edu/~olsson/papers/olsson06exploring.pdf},
abstract = {While most classifier studies have focused on set-based evaluation
measures, multi-label classification techniques that rank alternatives
and then apply a threshold to make binary decisions can also be
evaluated before thresholding. This can be done using well
understood measures from ranked retrieval ($R$-precision in this
case). Rank-based evaluation is first motivated by using a simple
simulation to show that thresholding can introduce effects which
obscure differences in the rank ordering of topics. The use of ranked retrieval
measures for formative evaluation of multi-label classification is
then demonstrated by exploring some techniques for combining evidence
of term utility to improve feature selection for $k$-Nearest-Neighbor
text classification. Because this ranked list evaluation framework
greatly reduces the computational cost per method variant explored, we are
able to investigate a large number of feature selection possibilities.
Easily constructed combinations
were found that proved to be more robust across a range of feature
set sizes and that yielded statistically significant improvements in
peak $R$-precision and microaveraged $F_1$ (a commonly reported
set-based measure).}
}
@INPROCEEDINGS{olsson07combining,
author = {J. Scott Olsson},
title = {{Combining Evidence for Improved Speech Retrieval}},
year = {2007},
location = {{Rochester, New York, USA}},
booktitle = {{NAACL-HLT '07: Doctoral Consortium and Poster Proceedings}}
}
@INPROCEEDINGS{olsson07improving,
author = {J. Scott Olsson and Douglas W. Oard},
title = {{Improving Text Classification for Oral History Archives with Temporal Domain Knowledge}},
year = {2007},
location = {{Amsterdam, Netherlands}},
booktitle = {{SIGIR '07: Proceedings of the 30th annual international ACM SIGIR conference on Research and development in information retrieval}},
ps = {http://www.math.umd.edu/~olsson/papers/olsson07improving.ps.gz},
pdf = {http://www.math.umd.edu/~olsson/papers/olsson07improving.pdf},
slides = {http://www.math.umd.edu/~olsson/papers/olsson07improving-slides.pdf},
abstract = {This paper describes two new techniques for increasing the accuracy of
topic label assignment to conversational speech from oral history
interviews using supervised machine learning in conjunction
with automatic speech recognition. The first, time-shifted
classification, leverages local sequence information from the order in
which the story is told. The second, temporal label weighting, takes
the complementary perspective by using the position within an
interview to bias label assignment probabilities. These methods, when
used in combination, yield between 6\% and 15\% relative improvements
in classification accuracy using a clipped R-precision measure that
models the utility of label sets as segment summaries in interactive
speech retrieval applications.}
}
@INPROCEEDINGS{olsson07improved,
author = {J. Scott Olsson},
title = {Improved Measures for Predicting the Usefulness of Recognition Lattices in Ranked Utterance Retrieval},
year = {2007},
location = {{Amsterdam, Netherlands}},
booktitle = {{SIGIR '07: Workshop on Searching Spontaneous Conversational Speech}},
ps = {http://www.math.umd.edu/~olsson/papers/olsson07improved.ps.gz},
pdf = {http://www.math.umd.edu/~olsson/papers/olsson07improved.pdf},
slides = {http://www.math.umd.edu/~olsson/papers/olsson07improved-slides.pdf},
abstract = {We consider the problem of evaluating automatic speech recognition
lattices to predict their usefulness in speech retrieval applications. In
particular, we focus on ranking utterances by our confidence that they contain a query
term. Our purpose is to close the gap between recognition efforts, which have traditionally
focused on producing one-best transcripts, and recent retrieval systems, which may
utilize multiple transcript hypotheses in indexing and search. We present a simple
framework for comparing the ability of two measures to predict how well a system can
retrieve a matching lattice. In a comparison with the traditional measure, simple accuracy
(or word error rate), we show with statistical significance that two new measures are
superior at predicting a vocabulary independent utterance retrieval system's rank
ordering of speech utterances.}
}
@INPROCEEDINGS{olsson08combining,
author = {J. Scott Olsson},
title = {Combining Speech Retrieval Results with Generalized Additive Models},
year = {2008},
location = {{Columbia, Ohio}},
booktitle = {{ACL} '08},
abstract = {Rapid and inexpensive techniques for automatic transcription of speech
have the potential to dramatically expand the types of content to
which information retrieval techniques can be productively applied,
but limitations in accuracy and robustness must be overcome
before that promise can be fully realized. Combining retrieval results from
systems built on various errorful representations of the same collection
offers some potential to address these challenges. This paper
explores that potential by applying Generalized Additive Models to
optimize the combination of ranked retrieval results obtained
using transcripts produced automatically for the same spoken content
by substantially different recognition systems. Topic-averaged retrieval
effectiveness better than any previously reported for the same collection was obtained,
and even larger gains are apparent when using an alternative measure emphasizing results
on the most difficult topics.}
}
@INPROCEEDINGS{olsson08vocabulary,
author = {J. Scott Olsson},
title = {Vocabulary Independent Discriminative Term Frequency Estimation},
abstract = {We introduce a discriminative approach to vocabulary independent term frequency estimation.
Using two separate corpora and recognition systems, we show that our model can perform significantly
better than a previously established generative model at this task.},
year = {2008},
booktitle = {Interspeech 2008}
}
This file has been generated by bibtex2html 1.85.